xref: /freebsd/lib/libc/locale/multibyte.3 (revision 4cf49a43559ed9fdad601bdcccd2c55963008675)
1.\" Copyright (c) 1993
2.\"	The Regents of the University of California.  All rights reserved.
3.\"
4.\" This code is derived from software contributed to Berkeley by
5.\" Donn Seeley of BSDI.
6.\"
7.\" Redistribution and use in source and binary forms, with or without
8.\" modification, are permitted provided that the following conditions
9.\" are met:
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. Redistributions in binary form must reproduce the above copyright
13.\"    notice, this list of conditions and the following disclaimer in the
14.\"    documentation and/or other materials provided with the distribution.
15.\" 3. All advertising materials mentioning features or use of this software
16.\"    must display the following acknowledgement:
17.\"	This product includes software developed by the University of
18.\"	California, Berkeley and its contributors.
19.\" 4. Neither the name of the University nor the names of its contributors
20.\"    may be used to endorse or promote products derived from this software
21.\"    without specific prior written permission.
22.\"
23.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33.\" SUCH DAMAGE.
34.\"
35.\"	@(#)multibyte.3	8.1 (Berkeley) 6/4/93
36.\" $FreeBSD$
37.\"
38.Dd June 4, 1993
39.Dt MULTIBYTE 3
40.Os
41.Sh NAME
42.Nm mblen ,
43.Nm mbstowcs ,
44.Nm mbtowc ,
45.Nm wcstombs ,
46.Nm wctomb
47.Nd multibyte character support for C
48.Sh SYNOPSIS
49.Fd #include <stdlib.h>
50.Ft int
51.Fn mblen "const char *mbchar" "size_t nbytes"
52.Ft size_t
53.Fn mbstowcs "wchar_t *wcstring" "const char *mbstring" "size_t nwchars"
54.Ft int
55.Fn mbtowc "wchar_t *wcharp" "const char *mbchar" "size_t nbytes"
56.Ft size_t
57.Fn wcstombs "char *mbstring" "const wchar_t *wcstring" "size_t nbytes"
58.Ft int
59.Fn wctomb "char *mbchar" "wchar_t wchar"
60.Sh DESCRIPTION
61The basic elements of some written natural languages such as Chinese
62cannot be represented uniquely with single C
63.Va char Ns s .
64The C standard supports two different ways of dealing with
65extended natural language encodings,
66.Em wide
67characters and
68.Em multibyte
69characters.
70Wide characters are an internal representation
71which allows each basic element to map
72to a single object of type
73.Va wchar_t .
74Multibyte characters are used for input and output
75and code each basic element as a sequence of C
76.Va char Ns s .
77Individual basic elements may map into one or more
78.Pq up to Dv MB_CHAR_MAX
79bytes in a multibyte character.
80.Pp
81The current locale
82.Pq Xr setlocale 3
83governs the interpretation of wide and multibyte characters.
84The locale category
85.Dv LC_CTYPE
86specifically controls this interpretation.
87The
88.Va wchar_t
89type is wide enough to hold the largest value
90in the wide character representations for all locales.
91.Pp
92Multibyte strings may contain
93.Sq shift
94indicators to switch to and from
95particular modes within the given representation.
96If explicit bytes are used to signal shifting,
97these are not recognized as separate characters
98but are lumped with a neighboring character.
99There is always a distinguished
100.Sq initial
101shift state.
102The
103.Fn mbstowcs
104and
105.Fn wcstombs
106functions assume that multibyte strings are interpreted
107starting from the initial shift state.
108The
109.Fn mblen ,
110.Fn mbtowc
111and
112.Fn wctomb
113functions maintain static shift state internally.
114A call with a null
115.Fa mbchar
116pointer returns nonzero if the current locale requires shift states,
117zero otherwise;
118if shift states are required, the shift state is reset to the initial state.
119The internal shift states are undefined after a call to
120.Fn setlocale
121with the
122.Dv LC_CTYPE
123or
124.Dv LC_ALL
125categories.
126.Pp
127For convenience in processing,
128the wide character with value 0
129.Pq the null wide character
130is recognized as the wide character string terminator,
131and the character with value 0
132.Pq the null byte
133is recognized as the multibyte character string terminator.
134Null bytes are not permitted within multibyte characters.
135.Pp
136The
137.Fn mblen
138function computes the length in bytes
139of a multibyte character
140.Fa mbchar .
141Up to
142.Fa nbytes
143bytes are examined.
144.Pp
145The
146.Fn mbtowc
147function converts a multibyte character
148.Fa mbchar
149into a wide character and stores the result
150in the object pointed to by
151.Fa wcharp.
152Up to
153.Fa nbytes
154bytes are examined.
155.Pp
156The
157.Fn wctomb
158function converts a wide character
159.Fa wchar
160into a multibyte character and stores
161the result in
162.Fa mbchar .
163The object pointed to by
164.Fa mbchar
165must be large enough to accommodate the multibyte character.
166.Pp
167The
168.Fn mbstowcs
169function converts a multibyte character string
170.Fa mbstring
171into a wide character string
172.Fa wcstring .
173No more than
174.Fa nwchars
175wide characters are stored.
176A terminating null wide character is appended if there is room.
177.Pp
178The
179.Fn wcstombs
180function converts a wide character string
181.Fa wcstring
182into a multibyte character string
183.Fa mbstring .
184Up to
185.Fa nbytes
186bytes are stored in
187.Fa mbstring .
188Partial multibyte characters at the end of the string are not stored.
189The multibyte character string is null terminated if there is room.
190.Sh "RETURN VALUES
191If multibyte characters are not supported in the current locale,
192all of these functions will return \-1 if characters can be processed,
193otherwise 0.
194.Pp
195If
196.Fa mbchar
197is
198.Dv NULL ,
199the
200.Fn mblen ,
201.Fn mbtowc
202and
203.Fn wctomb
204functions return nonzero if shift states are supported,
205zero otherwise.
206If
207.Fa mbchar
208is valid,
209then these functions return
210the number of bytes processed in
211.Fa mbchar ,
212or \-1 if no multibyte character
213could be recognized or converted.
214.Pp
215The
216.Fn mbstowcs
217function returns the number of wide characters converted,
218not counting any terminating null wide character.
219The
220.Fn wcstombs
221function returns the number of bytes converted,
222not counting any terminating null byte.
223If any invalid multibyte characters are encountered,
224both functions return \-1.
225.Sh "SEE ALSO
226.Xr mbrune 3 ,
227.Xr rune 3 ,
228.Xr setlocale 3 ,
229.Xr euc 4 ,
230.Xr utf2 4
231.Sh STANDARDS
232The
233.Fn mblen ,
234.Fn mbstowcs ,
235.Fn mbtowc ,
236.Fn wcstombs
237and
238.Fn wctomb
239functions conform to
240.St -ansiC .
241.Sh BUGS
242The current implementation does not support shift states.
243