xref: /freebsd/contrib/xz/src/common/tuklib_mbstr_width.c (revision a8675d927bbde29d5a4dc6efddf2f0dc6d6d6983)
1e0f0e66dSMartin Matuska ///////////////////////////////////////////////////////////////////////////////
2e0f0e66dSMartin Matuska //
3*a8675d92SXin LI /// \file       tuklib_mbstr_width.c
4e0f0e66dSMartin Matuska /// \brief      Calculate width of a multibyte string
5e0f0e66dSMartin Matuska //
6e0f0e66dSMartin Matuska //  Author:     Lasse Collin
7e0f0e66dSMartin Matuska //
8e0f0e66dSMartin Matuska //  This file has been put into the public domain.
9e0f0e66dSMartin Matuska //  You can do whatever you want with this file.
10e0f0e66dSMartin Matuska //
11e0f0e66dSMartin Matuska ///////////////////////////////////////////////////////////////////////////////
12e0f0e66dSMartin Matuska 
13e0f0e66dSMartin Matuska #include "tuklib_mbstr.h"
14*a8675d92SXin LI #include <string.h>
15e0f0e66dSMartin Matuska 
16e0f0e66dSMartin Matuska #if defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
17e0f0e66dSMartin Matuska #	include <wchar.h>
18e0f0e66dSMartin Matuska #endif
19e0f0e66dSMartin Matuska 
20e0f0e66dSMartin Matuska 
21e0f0e66dSMartin Matuska extern size_t
22e0f0e66dSMartin Matuska tuklib_mbstr_width(const char *str, size_t *bytes)
23e0f0e66dSMartin Matuska {
24e0f0e66dSMartin Matuska 	const size_t len = strlen(str);
25e0f0e66dSMartin Matuska 	if (bytes != NULL)
26e0f0e66dSMartin Matuska 		*bytes = len;
27e0f0e66dSMartin Matuska 
28e0f0e66dSMartin Matuska #if !(defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH))
29e0f0e66dSMartin Matuska 	// In single-byte mode, the width of the string is the same
30e0f0e66dSMartin Matuska 	// as its length.
31e0f0e66dSMartin Matuska 	return len;
32e0f0e66dSMartin Matuska 
33e0f0e66dSMartin Matuska #else
34e0f0e66dSMartin Matuska 	mbstate_t state;
35e0f0e66dSMartin Matuska 	memset(&state, 0, sizeof(state));
36e0f0e66dSMartin Matuska 
37e0f0e66dSMartin Matuska 	size_t width = 0;
38e0f0e66dSMartin Matuska 	size_t i = 0;
39e0f0e66dSMartin Matuska 
40e0f0e66dSMartin Matuska 	// Convert one multibyte character at a time to wchar_t
41e0f0e66dSMartin Matuska 	// and get its width using wcwidth().
42e0f0e66dSMartin Matuska 	while (i < len) {
43e0f0e66dSMartin Matuska 		wchar_t wc;
44e0f0e66dSMartin Matuska 		const size_t ret = mbrtowc(&wc, str + i, len - i, &state);
45e0f0e66dSMartin Matuska 		if (ret < 1 || ret > len)
46e0f0e66dSMartin Matuska 			return (size_t)-1;
47e0f0e66dSMartin Matuska 
48e0f0e66dSMartin Matuska 		i += ret;
49e0f0e66dSMartin Matuska 
50e0f0e66dSMartin Matuska 		const int wc_width = wcwidth(wc);
51e0f0e66dSMartin Matuska 		if (wc_width < 0)
52e0f0e66dSMartin Matuska 			return (size_t)-1;
53e0f0e66dSMartin Matuska 
54*a8675d92SXin LI 		width += (size_t)wc_width;
55e0f0e66dSMartin Matuska 	}
56e0f0e66dSMartin Matuska 
57e0f0e66dSMartin Matuska 	// Require that the string ends in the initial shift state.
58e0f0e66dSMartin Matuska 	// This way the caller can be combine the string with other
59e0f0e66dSMartin Matuska 	// strings without needing to worry about the shift states.
60e0f0e66dSMartin Matuska 	if (!mbsinit(&state))
61e0f0e66dSMartin Matuska 		return (size_t)-1;
62e0f0e66dSMartin Matuska 
63e0f0e66dSMartin Matuska 	return width;
64e0f0e66dSMartin Matuska #endif
65e0f0e66dSMartin Matuska }
66