xref: /illumos-gate/usr/src/common/smbsrv/smb_utf8.c (revision 7d1ffc32e5e72873791b96934af035e0f051fc14)
1da6c28aaSamw /*
2da6c28aaSamw  * CDDL HEADER START
3da6c28aaSamw  *
4da6c28aaSamw  * The contents of this file are subject to the terms of the
5da6c28aaSamw  * Common Development and Distribution License (the "License").
6da6c28aaSamw  * You may not use this file except in compliance with the License.
7da6c28aaSamw  *
8da6c28aaSamw  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9da6c28aaSamw  * or http://www.opensolaris.org/os/licensing.
10da6c28aaSamw  * See the License for the specific language governing permissions
11da6c28aaSamw  * and limitations under the License.
12da6c28aaSamw  *
13da6c28aaSamw  * When distributing Covered Code, include this CDDL HEADER in each
14da6c28aaSamw  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15da6c28aaSamw  * If applicable, add the following below this CDDL HEADER, with the
16da6c28aaSamw  * fields enclosed by brackets "[]" replaced with your own identifying
17da6c28aaSamw  * information: Portions Copyright [yyyy] [name of copyright owner]
18da6c28aaSamw  *
19da6c28aaSamw  * CDDL HEADER END
20da6c28aaSamw  */
21da6c28aaSamw /*
22bbf6f00cSJordan Brown  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23da6c28aaSamw  * Use is subject to license terms.
24b819cea2SGordon Ross  *
25*7d1ffc32SGordon Ross  * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
26da6c28aaSamw  */
27da6c28aaSamw 
28da6c28aaSamw /*
29*7d1ffc32SGordon Ross  * Multibyte/wide-char conversion routines. SMB uses UTF-16 on the wire
30*7d1ffc32SGordon Ross  * (smb_wchar_t) and we use UTF-8 internally (our multi-byte, or mbs).
31da6c28aaSamw  */
32da6c28aaSamw 
33b819cea2SGordon Ross #if defined(_KERNEL) || defined(_FAKE_KERNEL)
34da6c28aaSamw #include <sys/types.h>
35da6c28aaSamw #include <sys/sunddi.h>
3607a6ae61SGordon Ross #else	/* _KERNEL || _FAKE_KERNEL */
37da6c28aaSamw #include <stdio.h>
38da6c28aaSamw #include <stdlib.h>
39da6c28aaSamw #include <strings.h>
4007a6ae61SGordon Ross #include <iconv.h>
4107a6ae61SGordon Ross #include <assert.h>
4207a6ae61SGordon Ross #endif	/* _KERNEL || _FAKE_KERNEL */
43*7d1ffc32SGordon Ross #include <sys/u8_textprep.h>
44da6c28aaSamw #include <smbsrv/string.h>
45da6c28aaSamw 
46da6c28aaSamw 
47da6c28aaSamw /*
48da6c28aaSamw  * mbstowcs
49da6c28aaSamw  *
50da6c28aaSamw  * The mbstowcs() function converts a multibyte character string
51da6c28aaSamw  * mbstring into a wide character string wcstring. No more than
52da6c28aaSamw  * nwchars wide characters are stored. A terminating null wide
53da6c28aaSamw  * character is appended if there is room.
54da6c28aaSamw  *
55da6c28aaSamw  * Returns the number of wide characters converted, not counting
56da6c28aaSamw  * any terminating null wide character. Returns -1 if an invalid
57da6c28aaSamw  * multibyte character is encountered.
58da6c28aaSamw  */
59da6c28aaSamw size_t
smb_mbstowcs(smb_wchar_t * wcs,const char * mbs,size_t nwchars)60*7d1ffc32SGordon Ross smb_mbstowcs(smb_wchar_t *wcs, const char *mbs, size_t nwchars)
61da6c28aaSamw {
62*7d1ffc32SGordon Ross 	size_t mbslen, wcslen;
63*7d1ffc32SGordon Ross 	int err;
64da6c28aaSamw 
65*7d1ffc32SGordon Ross 	/* NULL or empty input is allowed. */
66*7d1ffc32SGordon Ross 	if (mbs == NULL || *mbs == '\0') {
67*7d1ffc32SGordon Ross 		if (wcs != NULL && nwchars > 0)
68*7d1ffc32SGordon Ross 			*wcs = 0;
69*7d1ffc32SGordon Ross 		return (0);
70*7d1ffc32SGordon Ross 	}
71*7d1ffc32SGordon Ross 
72*7d1ffc32SGordon Ross 	/*
73*7d1ffc32SGordon Ross 	 * Traditional mbstowcs(3C) allows wcs==NULL to get the length.
74*7d1ffc32SGordon Ross 	 * SMB never calls it that way, but let's future-proof.
75*7d1ffc32SGordon Ross 	 */
76*7d1ffc32SGordon Ross 	if (wcs == NULL) {
77da6c28aaSamw 		return ((size_t)-1);
78da6c28aaSamw 	}
79da6c28aaSamw 
80*7d1ffc32SGordon Ross 	mbslen = strlen(mbs);
81*7d1ffc32SGordon Ross 	wcslen = nwchars;
82*7d1ffc32SGordon Ross 	err = uconv_u8tou16((const uchar_t *)mbs, &mbslen,
83*7d1ffc32SGordon Ross 	    wcs, &wcslen, UCONV_OUT_LITTLE_ENDIAN);
84*7d1ffc32SGordon Ross 	if (err != 0)
85*7d1ffc32SGordon Ross 		return ((size_t)-1);
86da6c28aaSamw 
87*7d1ffc32SGordon Ross 	if (wcslen < nwchars)
88*7d1ffc32SGordon Ross 		wcs[wcslen] = 0;
89da6c28aaSamw 
90*7d1ffc32SGordon Ross 	return (wcslen);
91da6c28aaSamw }
92da6c28aaSamw 
93da6c28aaSamw 
94da6c28aaSamw /*
95da6c28aaSamw  * mbtowc
96da6c28aaSamw  *
97da6c28aaSamw  * The mbtowc() function converts a multibyte character mbchar into
98da6c28aaSamw  * a wide character and stores the result in the object pointed to
99da6c28aaSamw  * by wcharp. Up to nbytes bytes are examined.
100da6c28aaSamw  *
101da6c28aaSamw  * If mbchar is NULL, mbtowc() returns zero to indicate that shift
10255bf511dSas200622  * states are not supported.  Shift states are used to switch between
10355bf511dSas200622  * representation modes using reserved bytes to signal shifting
10455bf511dSas200622  * without them being interpreted as characters.  If mbchar is null
10555bf511dSas200622  * mbtowc should return non-zero if the current locale requires shift
10655bf511dSas200622  * states.  Otherwise it should be return 0.
10755bf511dSas200622  *
10855bf511dSas200622  * If mbchar is non-null, returns the number of bytes processed in
109*7d1ffc32SGordon Ross  * mbchar.  If mbchar is null, convert the null (wcharp=0) but
110*7d1ffc32SGordon Ross  * return length zero.  If mbchar is invalid, returns -1.
111da6c28aaSamw  */
112da6c28aaSamw int /*ARGSUSED*/
smb_mbtowc(uint32_t * wcharp,const char * mbchar,size_t nbytes)113*7d1ffc32SGordon Ross smb_mbtowc(uint32_t *wcharp, const char *mbchar, size_t nbytes)
114da6c28aaSamw {
115*7d1ffc32SGordon Ross 	uint32_t wide_char;
116*7d1ffc32SGordon Ross 	int count, err;
117*7d1ffc32SGordon Ross 	size_t mblen;
118*7d1ffc32SGordon Ross 	size_t wclen;
119da6c28aaSamw 
12055bf511dSas200622 	if (mbchar == NULL)
12155bf511dSas200622 		return (0); /* no shift states */
122da6c28aaSamw 
123*7d1ffc32SGordon Ross 	/*
124*7d1ffc32SGordon Ross 	 * How many bytes in this symbol?
125*7d1ffc32SGordon Ross 	 */
126*7d1ffc32SGordon Ross 	count = u8_validate((char *)mbchar, nbytes, NULL, 0, &err);
127*7d1ffc32SGordon Ross 	if (count < 0)
128da6c28aaSamw 		return (-1);
129da6c28aaSamw 
130*7d1ffc32SGordon Ross 	mblen = count;
131*7d1ffc32SGordon Ross 	wclen = 1;
132*7d1ffc32SGordon Ross 	err = uconv_u8tou32((const uchar_t *)mbchar, &mblen,
133*7d1ffc32SGordon Ross 	    &wide_char, &wclen, UCONV_OUT_SYSTEM_ENDIAN);
134*7d1ffc32SGordon Ross 	if (err != 0)
135da6c28aaSamw 		return (-1);
136*7d1ffc32SGordon Ross 	if (wclen == 0) {
137*7d1ffc32SGordon Ross 		wide_char = 0;
138*7d1ffc32SGordon Ross 		count = 0;
139da6c28aaSamw 	}
140da6c28aaSamw 
141da6c28aaSamw 	if (wcharp)
142da6c28aaSamw 		*wcharp = wide_char;
143da6c28aaSamw 
144da6c28aaSamw 	return (count);
145da6c28aaSamw }
146da6c28aaSamw 
147da6c28aaSamw 
148da6c28aaSamw /*
149da6c28aaSamw  * wctomb
150da6c28aaSamw  *
151da6c28aaSamw  * The wctomb() function converts a wide character wchar into a multibyte
152da6c28aaSamw  * character and stores the result in mbchar. The object pointed to by
153da6c28aaSamw  * mbchar must be large enough to accommodate the multibyte character.
154da6c28aaSamw  *
155da6c28aaSamw  * Returns the numberof bytes written to mbchar.
156*7d1ffc32SGordon Ross  * Note: handles null like any 1-byte char.
157da6c28aaSamw  */
158da6c28aaSamw int
smb_wctomb(char * mbchar,uint32_t wchar)159*7d1ffc32SGordon Ross smb_wctomb(char *mbchar, uint32_t wchar)
160da6c28aaSamw {
161*7d1ffc32SGordon Ross 	char junk[MTS_MB_CUR_MAX+1];
162*7d1ffc32SGordon Ross 	size_t mblen;
163*7d1ffc32SGordon Ross 	size_t wclen;
164*7d1ffc32SGordon Ross 	int err;
165da6c28aaSamw 
166*7d1ffc32SGordon Ross 	if (mbchar == NULL)
167*7d1ffc32SGordon Ross 		mbchar = junk;
168da6c28aaSamw 
169*7d1ffc32SGordon Ross 	mblen = MTS_MB_CUR_MAX;
170*7d1ffc32SGordon Ross 	wclen = 1;
171*7d1ffc32SGordon Ross 	err = uconv_u32tou8(&wchar, &wclen, (uchar_t *)mbchar, &mblen,
172*7d1ffc32SGordon Ross 	    UCONV_IN_SYSTEM_ENDIAN | UCONV_IGNORE_NULL);
173*7d1ffc32SGordon Ross 	if (err != 0)
174*7d1ffc32SGordon Ross 		return (-1);
175*7d1ffc32SGordon Ross 
176*7d1ffc32SGordon Ross 	return ((int)mblen);
177da6c28aaSamw }
178da6c28aaSamw 
179da6c28aaSamw 
180da6c28aaSamw /*
181da6c28aaSamw  * wcstombs
182da6c28aaSamw  *
183da6c28aaSamw  * The wcstombs() function converts a wide character string wcstring
184da6c28aaSamw  * into a multibyte character string mbstring. Up to nbytes bytes are
185da6c28aaSamw  * stored in mbstring. Partial multibyte characters at the end of the
186da6c28aaSamw  * string are not stored. The multibyte character string is null
187da6c28aaSamw  * terminated if there is room.
188da6c28aaSamw  *
189da6c28aaSamw  * Returns the number of bytes converted, not counting the terminating
190*7d1ffc32SGordon Ross  * null byte. Returns -1 if an invalid WC sequence is encountered.
191da6c28aaSamw  */
192da6c28aaSamw size_t
smb_wcstombs(char * mbs,const smb_wchar_t * wcs,size_t nbytes)193*7d1ffc32SGordon Ross smb_wcstombs(char *mbs, const smb_wchar_t *wcs, size_t nbytes)
194da6c28aaSamw {
195*7d1ffc32SGordon Ross 	size_t mbslen, wcslen;
196*7d1ffc32SGordon Ross 	int err;
197da6c28aaSamw 
198*7d1ffc32SGordon Ross 	/* NULL or empty input is allowed. */
199*7d1ffc32SGordon Ross 	if (wcs == NULL || *wcs == 0) {
200*7d1ffc32SGordon Ross 		if (mbs != NULL && nbytes > 0)
201*7d1ffc32SGordon Ross 			*mbs = '\0';
202da6c28aaSamw 		return (0);
203da6c28aaSamw 	}
204da6c28aaSamw 
205*7d1ffc32SGordon Ross 	/*
206*7d1ffc32SGordon Ross 	 * Traditional wcstombs(3C) allows mbs==NULL to get the length.
207*7d1ffc32SGordon Ross 	 * SMB never calls it that way, but let's future-proof.
208*7d1ffc32SGordon Ross 	 */
209*7d1ffc32SGordon Ross 	if (mbs == NULL) {
210*7d1ffc32SGordon Ross 		return ((size_t)-1);
211da6c28aaSamw 	}
212da6c28aaSamw 
213*7d1ffc32SGordon Ross 	/*
214*7d1ffc32SGordon Ross 	 * Compute wcslen
215*7d1ffc32SGordon Ross 	 */
216*7d1ffc32SGordon Ross 	wcslen = 0;
217*7d1ffc32SGordon Ross 	while (wcs[wcslen] != 0)
218*7d1ffc32SGordon Ross 		wcslen++;
219da6c28aaSamw 
220*7d1ffc32SGordon Ross 	mbslen = nbytes;
221*7d1ffc32SGordon Ross 	err = uconv_u16tou8(wcs, &wcslen,
222*7d1ffc32SGordon Ross 	    (uchar_t *)mbs, &mbslen, UCONV_IN_LITTLE_ENDIAN);
223*7d1ffc32SGordon Ross 	if (err != 0)
224*7d1ffc32SGordon Ross 		return ((size_t)-1);
225*7d1ffc32SGordon Ross 
226*7d1ffc32SGordon Ross 	if (mbslen < nbytes)
227*7d1ffc32SGordon Ross 		mbs[mbslen] = '\0';
228*7d1ffc32SGordon Ross 
229*7d1ffc32SGordon Ross 	return (mbslen);
230da6c28aaSamw }
231da6c28aaSamw 
232da6c28aaSamw 
233da6c28aaSamw /*
234da6c28aaSamw  * Returns the number of bytes that would be written if the multi-
235da6c28aaSamw  * byte string mbs was converted to a wide character string, not
236da6c28aaSamw  * counting the terminating null wide character.
237da6c28aaSamw  */
238da6c28aaSamw size_t
smb_wcequiv_strlen(const char * mbs)239bbf6f00cSJordan Brown smb_wcequiv_strlen(const char *mbs)
240da6c28aaSamw {
241*7d1ffc32SGordon Ross 	uint32_t	wide_char;
242da6c28aaSamw 	size_t bytes;
243da6c28aaSamw 	size_t len = 0;
244da6c28aaSamw 
245da6c28aaSamw 	while (*mbs) {
246bbf6f00cSJordan Brown 		bytes = smb_mbtowc(&wide_char, mbs, MTS_MB_CHAR_MAX);
247da6c28aaSamw 		if (bytes == ((size_t)-1))
248da6c28aaSamw 			return ((size_t)-1);
249*7d1ffc32SGordon Ross 		mbs += bytes;
250da6c28aaSamw 
251bbf6f00cSJordan Brown 		len += sizeof (smb_wchar_t);
252*7d1ffc32SGordon Ross 		if (bytes > 3) {
253*7d1ffc32SGordon Ross 			/*
254*7d1ffc32SGordon Ross 			 * Extended unicode, so TWO smb_wchar_t
255*7d1ffc32SGordon Ross 			 */
256*7d1ffc32SGordon Ross 			len += sizeof (smb_wchar_t);
257*7d1ffc32SGordon Ross 		}
258da6c28aaSamw 	}
259da6c28aaSamw 
260da6c28aaSamw 	return (len);
261da6c28aaSamw }
262da6c28aaSamw 
263da6c28aaSamw 
264da6c28aaSamw /*
265da6c28aaSamw  * Returns the number of bytes that would be written if the multi-
26607a6ae61SGordon Ross  * byte string mbs was converted to an OEM character string,
267*7d1ffc32SGordon Ross  * (smb_mbstooem) not counting the terminating null character.
268da6c28aaSamw  */
269da6c28aaSamw size_t
smb_sbequiv_strlen(const char * mbs)270bbf6f00cSJordan Brown smb_sbequiv_strlen(const char *mbs)
271da6c28aaSamw {
272da6c28aaSamw 	size_t nbytes;
273da6c28aaSamw 	size_t len = 0;
274da6c28aaSamw 
275da6c28aaSamw 	while (*mbs) {
276*7d1ffc32SGordon Ross 		nbytes = smb_mbtowc(NULL, mbs, MTS_MB_CHAR_MAX);
277da6c28aaSamw 		if (nbytes == ((size_t)-1))
278da6c28aaSamw 			return ((size_t)-1);
279*7d1ffc32SGordon Ross 		if (nbytes == 0)
280*7d1ffc32SGordon Ross 			break;
281da6c28aaSamw 
282*7d1ffc32SGordon Ross 		if (nbytes == 1) {
283*7d1ffc32SGordon Ross 			/* ASCII */
284*7d1ffc32SGordon Ross 			len++;
285*7d1ffc32SGordon Ross 		} else if (nbytes < 8) {
286*7d1ffc32SGordon Ross 			/* Compute OEM length */
287*7d1ffc32SGordon Ross 			char mbsbuf[8];
288*7d1ffc32SGordon Ross 			uint8_t oembuf[8];
289*7d1ffc32SGordon Ross 			int oemlen;
290*7d1ffc32SGordon Ross 			(void) strlcpy(mbsbuf, mbs, nbytes+1);
291*7d1ffc32SGordon Ross 			oemlen = smb_mbstooem(oembuf, mbsbuf, 8);
292*7d1ffc32SGordon Ross 			if (oemlen < 0)
293*7d1ffc32SGordon Ross 				return ((size_t)-1);
294*7d1ffc32SGordon Ross 			len += oemlen;
295*7d1ffc32SGordon Ross 		} else {
296*7d1ffc32SGordon Ross 			return ((size_t)-1);
297*7d1ffc32SGordon Ross 		}
298da6c28aaSamw 
299da6c28aaSamw 		mbs += nbytes;
300da6c28aaSamw 	}
301da6c28aaSamw 
302da6c28aaSamw 	return (len);
303da6c28aaSamw }
304da6c28aaSamw 
30507a6ae61SGordon Ross /*
30607a6ae61SGordon Ross  * Convert OEM strings to/from internal (UTF-8) form.
30707a6ae61SGordon Ross  *
30807a6ae61SGordon Ross  * We rarely encounter these anymore because all modern
30907a6ae61SGordon Ross  * SMB clients use Unicode (UTF-16). The few cases where
31007a6ae61SGordon Ross  * this IS still called are normally using ASCII, i.e.
31107a6ae61SGordon Ross  * tag names etc. so short-cut those cases.  If we get
31207a6ae61SGordon Ross  * something non-ASCII we have to call iconv.
31307a6ae61SGordon Ross  *
31407a6ae61SGordon Ross  * If we were to really support OEM code pages, we would
31507a6ae61SGordon Ross  * need to have a way to set the OEM code page from some
31607a6ae61SGordon Ross  * configuration value.  For now it's always CP850.
31707a6ae61SGordon Ross  * See also ./smb_oem.c
31807a6ae61SGordon Ross  */
31907a6ae61SGordon Ross static char smb_oem_codepage[32] = "CP850";
320da6c28aaSamw 
321da6c28aaSamw /*
32207a6ae61SGordon Ross  * smb_oemtombs
323da6c28aaSamw  *
32407a6ae61SGordon Ross  * Convert a null terminated OEM string 'string' to a UTF-8 string
32507a6ae61SGordon Ross  * no longer than max_mblen (null terminated if space).
326da6c28aaSamw  *
32707a6ae61SGordon Ross  * If the input string contains invalid OEM characters, a value
32807a6ae61SGordon Ross  * of -1 will be returned. Otherwise returns the length of 'mbs',
32907a6ae61SGordon Ross  * excluding the terminating null character.
330da6c28aaSamw  *
331da6c28aaSamw  * If either mbstring or string is a null pointer, -1 is returned.
332da6c28aaSamw  */
333da6c28aaSamw int
smb_oemtombs(char * mbs,const uint8_t * oems,int max_mblen)33407a6ae61SGordon Ross smb_oemtombs(char *mbs, const uint8_t *oems, int max_mblen)
335da6c28aaSamw {
33607a6ae61SGordon Ross 	uchar_t *p;
33707a6ae61SGordon Ross 	int	oemlen;
33807a6ae61SGordon Ross 	int	rlen;
33907a6ae61SGordon Ross 	boolean_t need_iconv = B_FALSE;
340da6c28aaSamw 
34107a6ae61SGordon Ross 	if (mbs == NULL || oems == NULL)
342da6c28aaSamw 		return (-1);
343da6c28aaSamw 
34407a6ae61SGordon Ross 	/*
34507a6ae61SGordon Ross 	 * Check if the oems is all ASCII (and get the length
34607a6ae61SGordon Ross 	 * while we're at it) so we know if we need to iconv.
34707a6ae61SGordon Ross 	 * We usually can avoid the iconv calls.
34807a6ae61SGordon Ross 	 */
34907a6ae61SGordon Ross 	oemlen = 0;
35007a6ae61SGordon Ross 	p = (uchar_t *)oems;
35107a6ae61SGordon Ross 	while (*p != '\0') {
35207a6ae61SGordon Ross 		oemlen++;
35307a6ae61SGordon Ross 		if (*p & 0x80)
35407a6ae61SGordon Ross 			need_iconv = B_TRUE;
35507a6ae61SGordon Ross 		p++;
356da6c28aaSamw 	}
357da6c28aaSamw 
35807a6ae61SGordon Ross 	if (need_iconv) {
35907a6ae61SGordon Ross 		int	rc;
36007a6ae61SGordon Ross 		char	*obuf = mbs;
36107a6ae61SGordon Ross 		size_t	olen = max_mblen;
36207a6ae61SGordon Ross 		size_t	ilen = oemlen;
36307a6ae61SGordon Ross #if defined(_KERNEL) || defined(_FAKE_KERNEL)
36407a6ae61SGordon Ross 		char *ibuf = (char *)oems;
36507a6ae61SGordon Ross 		kiconv_t ic;
36607a6ae61SGordon Ross 		int	err;
367da6c28aaSamw 
36807a6ae61SGordon Ross 		ic = kiconv_open("UTF-8", smb_oem_codepage);
36907a6ae61SGordon Ross 		if (ic == (kiconv_t)-1)
37007a6ae61SGordon Ross 			goto just_copy;
37107a6ae61SGordon Ross 		rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err);
37207a6ae61SGordon Ross 		(void) kiconv_close(ic);
37307a6ae61SGordon Ross #else	/* _KERNEL || _FAKE_KERNEL */
37407a6ae61SGordon Ross 		const char *ibuf = (char *)oems;
37507a6ae61SGordon Ross 		iconv_t	ic;
37607a6ae61SGordon Ross 		ic = iconv_open("UTF-8", smb_oem_codepage);
37707a6ae61SGordon Ross 		if (ic == (iconv_t)-1)
37807a6ae61SGordon Ross 			goto just_copy;
37907a6ae61SGordon Ross 		rc = iconv(ic, &ibuf, &ilen, &obuf, &olen);
38007a6ae61SGordon Ross 		(void) iconv_close(ic);
38107a6ae61SGordon Ross #endif	/* _KERNEL || _FAKE_KERNEL */
38207a6ae61SGordon Ross 		if (rc < 0)
38307a6ae61SGordon Ross 			return (-1);
38407a6ae61SGordon Ross 		/* Return val. is output bytes. */
38507a6ae61SGordon Ross 		rlen = (max_mblen - olen);
38607a6ae61SGordon Ross 	} else {
38707a6ae61SGordon Ross 	just_copy:
38807a6ae61SGordon Ross 		rlen = oemlen;
38907a6ae61SGordon Ross 		if (rlen > max_mblen)
39007a6ae61SGordon Ross 			rlen = max_mblen;
39107a6ae61SGordon Ross 		bcopy(oems, mbs, rlen);
392da6c28aaSamw 	}
39307a6ae61SGordon Ross 	if (rlen < max_mblen)
39407a6ae61SGordon Ross 		mbs[rlen] = '\0';
395da6c28aaSamw 
39607a6ae61SGordon Ross 	return (rlen);
39707a6ae61SGordon Ross }
398da6c28aaSamw 
399da6c28aaSamw /*
40007a6ae61SGordon Ross  * smb_mbstooem
401da6c28aaSamw  *
40207a6ae61SGordon Ross  * Convert a null terminated multi-byte string 'mbs' to an OEM string
40307a6ae61SGordon Ross  * no longer than max_oemlen (null terminated if space).
404da6c28aaSamw  *
40507a6ae61SGordon Ross  * If the input string contains invalid multi-byte characters, a value
40607a6ae61SGordon Ross  * of -1 will be returned. Otherwise returns the length of 'oems',
40707a6ae61SGordon Ross  * excluding the terminating null character.
408da6c28aaSamw  *
409da6c28aaSamw  * If either mbstring or string is a null pointer, -1 is returned.
410da6c28aaSamw  */
411da6c28aaSamw int
smb_mbstooem(uint8_t * oems,const char * mbs,int max_oemlen)41207a6ae61SGordon Ross smb_mbstooem(uint8_t *oems, const char *mbs, int max_oemlen)
413da6c28aaSamw {
41407a6ae61SGordon Ross 	uchar_t *p;
41507a6ae61SGordon Ross 	int	mbslen;
41607a6ae61SGordon Ross 	int	rlen;
41707a6ae61SGordon Ross 	boolean_t need_iconv = B_FALSE;
418da6c28aaSamw 
41907a6ae61SGordon Ross 	if (oems == NULL || mbs == NULL)
420da6c28aaSamw 		return (-1);
421da6c28aaSamw 
42207a6ae61SGordon Ross 	/*
42307a6ae61SGordon Ross 	 * Check if the mbs is all ASCII (and get the length
42407a6ae61SGordon Ross 	 * while we're at it) so we know if we need to iconv.
42507a6ae61SGordon Ross 	 * We usually can avoid the iconv calls.
42607a6ae61SGordon Ross 	 */
42707a6ae61SGordon Ross 	mbslen = 0;
42807a6ae61SGordon Ross 	p = (uchar_t *)mbs;
42907a6ae61SGordon Ross 	while (*p != '\0') {
43007a6ae61SGordon Ross 		mbslen++;
43107a6ae61SGordon Ross 		if (*p & 0x80)
43207a6ae61SGordon Ross 			need_iconv = B_TRUE;
43307a6ae61SGordon Ross 		p++;
43407a6ae61SGordon Ross 	}
43507a6ae61SGordon Ross 
43607a6ae61SGordon Ross 	if (need_iconv) {
43707a6ae61SGordon Ross 		int	rc;
43807a6ae61SGordon Ross 		char	*obuf = (char *)oems;
43907a6ae61SGordon Ross 		size_t	olen = max_oemlen;
44007a6ae61SGordon Ross 		size_t	ilen = mbslen;
44107a6ae61SGordon Ross #if defined(_KERNEL) || defined(_FAKE_KERNEL)
44207a6ae61SGordon Ross 		char *ibuf = (char *)mbs;
44307a6ae61SGordon Ross 		kiconv_t ic;
44407a6ae61SGordon Ross 		int	err;
44507a6ae61SGordon Ross 
44607a6ae61SGordon Ross 		ic = kiconv_open(smb_oem_codepage, "UTF-8");
44707a6ae61SGordon Ross 		if (ic == (kiconv_t)-1)
44807a6ae61SGordon Ross 			goto just_copy;
44907a6ae61SGordon Ross 		rc = kiconv(ic, &ibuf, &ilen, &obuf, &olen, &err);
45007a6ae61SGordon Ross 		(void) kiconv_close(ic);
45107a6ae61SGordon Ross #else	/* _KERNEL || _FAKE_KERNEL */
45207a6ae61SGordon Ross 		const char *ibuf = mbs;
45307a6ae61SGordon Ross 		iconv_t	ic;
45407a6ae61SGordon Ross 		ic = iconv_open(smb_oem_codepage, "UTF-8");
45507a6ae61SGordon Ross 		if (ic == (iconv_t)-1)
45607a6ae61SGordon Ross 			goto just_copy;
45707a6ae61SGordon Ross 		rc = iconv(ic, &ibuf, &ilen, &obuf, &olen);
45807a6ae61SGordon Ross 		(void) iconv_close(ic);
45907a6ae61SGordon Ross #endif	/* _KERNEL || _FAKE_KERNEL */
46007a6ae61SGordon Ross 		if (rc < 0)
461da6c28aaSamw 			return (-1);
46207a6ae61SGordon Ross 		/* Return val. is output bytes. */
46307a6ae61SGordon Ross 		rlen = (max_oemlen - olen);
46407a6ae61SGordon Ross 	} else {
46507a6ae61SGordon Ross 	just_copy:
46607a6ae61SGordon Ross 		rlen = mbslen;
46707a6ae61SGordon Ross 		if (rlen > max_oemlen)
46807a6ae61SGordon Ross 			rlen = max_oemlen;
46907a6ae61SGordon Ross 		bcopy(mbs, oems, rlen);
470da6c28aaSamw 	}
47107a6ae61SGordon Ross 	if (rlen < max_oemlen)
47207a6ae61SGordon Ross 		oems[rlen] = '\0';
473da6c28aaSamw 
47407a6ae61SGordon Ross 	return (rlen);
475da6c28aaSamw }
476