xref: /freebsd/contrib/xz/src/common/tuklib_mbstr_width.c (revision e0f0e66dfeda9df4f104f48bd42d5a28d8ae631e)
1*e0f0e66dSMartin Matuska ///////////////////////////////////////////////////////////////////////////////
2*e0f0e66dSMartin Matuska //
3*e0f0e66dSMartin Matuska /// \file       tuklib_mstr_width.c
4*e0f0e66dSMartin Matuska /// \brief      Calculate width of a multibyte string
5*e0f0e66dSMartin Matuska //
6*e0f0e66dSMartin Matuska //  Author:     Lasse Collin
7*e0f0e66dSMartin Matuska //
8*e0f0e66dSMartin Matuska //  This file has been put into the public domain.
9*e0f0e66dSMartin Matuska //  You can do whatever you want with this file.
10*e0f0e66dSMartin Matuska //
11*e0f0e66dSMartin Matuska ///////////////////////////////////////////////////////////////////////////////
12*e0f0e66dSMartin Matuska 
13*e0f0e66dSMartin Matuska #include "tuklib_mbstr.h"
14*e0f0e66dSMartin Matuska 
15*e0f0e66dSMartin Matuska #if defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
16*e0f0e66dSMartin Matuska #	include <wchar.h>
17*e0f0e66dSMartin Matuska #endif
18*e0f0e66dSMartin Matuska 
19*e0f0e66dSMartin Matuska 
20*e0f0e66dSMartin Matuska extern size_t
21*e0f0e66dSMartin Matuska tuklib_mbstr_width(const char *str, size_t *bytes)
22*e0f0e66dSMartin Matuska {
23*e0f0e66dSMartin Matuska 	const size_t len = strlen(str);
24*e0f0e66dSMartin Matuska 	if (bytes != NULL)
25*e0f0e66dSMartin Matuska 		*bytes = len;
26*e0f0e66dSMartin Matuska 
27*e0f0e66dSMartin Matuska #if !(defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH))
28*e0f0e66dSMartin Matuska 	// In single-byte mode, the width of the string is the same
29*e0f0e66dSMartin Matuska 	// as its length.
30*e0f0e66dSMartin Matuska 	return len;
31*e0f0e66dSMartin Matuska 
32*e0f0e66dSMartin Matuska #else
33*e0f0e66dSMartin Matuska 	mbstate_t state;
34*e0f0e66dSMartin Matuska 	memset(&state, 0, sizeof(state));
35*e0f0e66dSMartin Matuska 
36*e0f0e66dSMartin Matuska 	size_t width = 0;
37*e0f0e66dSMartin Matuska 	size_t i = 0;
38*e0f0e66dSMartin Matuska 
39*e0f0e66dSMartin Matuska 	// Convert one multibyte character at a time to wchar_t
40*e0f0e66dSMartin Matuska 	// and get its width using wcwidth().
41*e0f0e66dSMartin Matuska 	while (i < len) {
42*e0f0e66dSMartin Matuska 		wchar_t wc;
43*e0f0e66dSMartin Matuska 		const size_t ret = mbrtowc(&wc, str + i, len - i, &state);
44*e0f0e66dSMartin Matuska 		if (ret < 1 || ret > len)
45*e0f0e66dSMartin Matuska 			return (size_t)-1;
46*e0f0e66dSMartin Matuska 
47*e0f0e66dSMartin Matuska 		i += ret;
48*e0f0e66dSMartin Matuska 
49*e0f0e66dSMartin Matuska 		const int wc_width = wcwidth(wc);
50*e0f0e66dSMartin Matuska 		if (wc_width < 0)
51*e0f0e66dSMartin Matuska 			return (size_t)-1;
52*e0f0e66dSMartin Matuska 
53*e0f0e66dSMartin Matuska 		width += wc_width;
54*e0f0e66dSMartin Matuska 	}
55*e0f0e66dSMartin Matuska 
56*e0f0e66dSMartin Matuska 	// Require that the string ends in the initial shift state.
57*e0f0e66dSMartin Matuska 	// This way the caller can be combine the string with other
58*e0f0e66dSMartin Matuska 	// strings without needing to worry about the shift states.
59*e0f0e66dSMartin Matuska 	if (!mbsinit(&state))
60*e0f0e66dSMartin Matuska 		return (size_t)-1;
61*e0f0e66dSMartin Matuska 
62*e0f0e66dSMartin Matuska 	return width;
63*e0f0e66dSMartin Matuska #endif
64*e0f0e66dSMartin Matuska }
65