xref: /illumos-gate/usr/src/lib/libslp/javalib/com/sun/slp/IANACharCode.java (revision 4de2612967d06c4fdbf524a62556a1e8118a006f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * ident	"%Z%%M%	%I%	%E% SMI"
24  *
25  * Copyright (c) 2001 by Sun Microsystems, Inc.
26  * All rights reserved.
27  *
28  */
29 
30 //  SCCS Status:      %W%	%G%
31 //  IANACharCode.java: SLPv1 Character encoding support
32 //  Author:           James Kempf
33 //  Created On:       Fri Sep 11 13:24:02 1998
34 //  Last Modified By: James Kempf
35 //  Last Modified On: Wed Oct 28 14:33:02 1998
36 //  Update Count:     7
37 //
38 
39 
40 package com.sun.slp;
41 
42 import java.util.*;
43 import java.io.*;
44 
45 /**
46  * The IANACharCode class supports static methods for decoding IANA
47  * character codes into strings appropriate for the Java Writer subclass
48  * encoding String arguments, and for encoding the String descriptions
49  * of character codings into the integer codes. Ideally, Java itself
50  * should support this.
51  *
52  * @version %R%.%L% %D%
53  * @author James Kempf
54  */
55 
56 abstract class IANACharCode extends Object {
57 
58     // Character code descriptors. These can be used with the Java
59     //  character encoding utilities. For Unicode, we use little on
60     //  input,
61 
62     static final String ASCII = "Default";
63     static final String LATIN1 = "latin1";
64     static final String UTF8 = "UTF8";
65     static final String UNICODE = "Unicode";
66     static final String UNICODE_LITTLE = "UnicodeLittle";
67     static final String UNICODE_BIG = "UnicodeBig";
68     static final String UNICODE_BIG_NO_HDR = "UnicodeBigNoHdr";
69 
70     // Error code for misidentified character set.
71 
72     static final short CHARSET_NOT_UNDERSTOOD = 5;
73 
74     // Character codes.
75 
76     protected static final int CHAR_ASCII   = 3;
77     protected static final int CHAR_LATIN1  = 4;
78     protected static final int CHAR_UTF8    = 6;
79     protected static final int CHAR_UNICODE = 1000;
80 
81     // First two bytes indicate that string is big/little endian Unicode.
82     //  If this flag isn't set, then big endian is assumed and we
83     //  must add the big endian bytes on every call.
84 
85     protected static final byte[] UNICODE_LITTLE_FLAG =
86 					{(byte)0xFF, (byte)0xFE};
87 
88     protected static final byte[] UNICODE_BIG_FLAG =
89 					{(byte)0xFE, (byte)0xFF};
90 
91     /**
92      * Encode the String describing a character encoding into
93      * the approprate integer descriptor code.
94      *
95      * @param encoding The String describing the encoding.
96      * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the
97      *			String is not recognized.
98      */
99 
100     static int encodeCharacterEncoding(String encoding)
101 	throws ServiceLocationException {
102 
103 	if (encoding.equals(ASCII)) {
104 	    return CHAR_ASCII;
105 	} else if (encoding.equals(LATIN1)) {
106 	    return CHAR_LATIN1;
107 	} else if (encoding.equals(UTF8)) {
108 	    return CHAR_UTF8;
109 	} else if (encoding.equals(UNICODE)) {
110 	    return CHAR_UNICODE;
111 	} else if (encoding.equals(UNICODE_BIG)) {
112 	    return CHAR_UNICODE;
113 	} else if (encoding.equals(UNICODE_LITTLE)) {
114 	    return CHAR_UNICODE;
115 	} else if (encoding.equals(UNICODE_BIG_NO_HDR)) {
116 	    return CHAR_UNICODE;
117 	}
118 
119 	throw
120 	    new ServiceLocationException(
121 				CHARSET_NOT_UNDERSTOOD,
122 				"v1_unsupported_encoding",
123 				new Object[] {encoding});
124     }
125 
126     /**
127      * Decode the integer describing a character encoding into
128      * the approprate String descriptor.
129      *
130      * @param code The integer coding the String set.
131      * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the
132      *			integer is not recognized.
133      */
134 
135     static String decodeCharacterEncoding(int code)
136 	throws ServiceLocationException {
137 
138 	switch (code) {
139 	case CHAR_ASCII: 	return ASCII;
140 	case CHAR_LATIN1:	return LATIN1;
141 	case CHAR_UTF8:	return UTF8;
142 	case CHAR_UNICODE:	return UNICODE;
143 	}
144 
145 	throw
146 	    new ServiceLocationException(
147 				CHARSET_NOT_UNDERSTOOD,
148 				"v1_unsupported_encoding",
149 				new Object[] {Integer.toString(code)});
150     }
151 
152     /**
153      * Return a string of integers giving the character's encoding in
154      * the character set passed in as encoding.
155      *
156      * @param c The character to escape.
157      * @param encoding The character set encoding to use.
158      * @return The character as a string of integers for the encoding.
159      * @exception ServiceLocationException Thrown if the encoding is not
160      *		 recognized, if the character's encoding
161      *		 has more than 8 bytes or if the sign bit gets turned on.
162      */
163 
164     static String escapeChar(char c, String encoding)
165 	throws ServiceLocationException {
166 
167 	ByteArrayOutputStream baos = new ByteArrayOutputStream();
168 
169 	try {
170 	    OutputStreamWriter osw = new OutputStreamWriter(baos, encoding);
171 
172 	    osw.write(c);
173 	    osw.flush();
174 
175 	} catch (UnsupportedEncodingException ex) {
176 
177 	    throw
178 		new ServiceLocationException(
179 				CHARSET_NOT_UNDERSTOOD,
180 				"v1_unsupported_encoding",
181 				new Object[] {encoding});
182 
183 	} catch (IOException ex) {
184 
185 	}
186 
187 	byte b[] = baos.toByteArray();
188 	int code = 0;
189 
190 	// Assemble the character code based on the encoding type.
191 
192 	if (encoding.equals(UNICODE) ||
193 	    encoding.equals(UNICODE_BIG) ||
194 	    encoding.equals(UNICODE_LITTLE)) {
195 
196 	    code = (int)(b[0] & 0xFF);		// control bytes...
197 	    code = (int)(code | ((b[1] & 0xFF) << 8));
198 	    code = (int)(code | ((b[2] & 0xFF) << 16));
199 	    code = (int)(code | ((b[3] & 0xFF) << 24));
200 
201 	    if (b.length <= 4) {
202 		throw
203 		    new ServiceLocationException(
204 				ServiceLocationException.PARSE_ERROR,
205 				"v1_charcode_error",
206 				new Object[] {new Character(c), encoding});
207 	    }
208 
209 	} else if (encoding.equals(ASCII) || encoding.equals(LATIN1)) {
210 
211 	    code = (int)(b[0] & 0xFF);
212 
213 	    if (b.length > 1) {
214 		throw
215 		    new ServiceLocationException(
216 				ServiceLocationException.PARSE_ERROR,
217 				"v1_charcode_error",
218 				new Object[] {new Character(c), encoding});
219 	    }
220 	} else if (encoding.equals(UTF8)) {
221 
222 	    if (b.length > 3) {
223 		throw
224 		    new ServiceLocationException(
225 				ServiceLocationException.PARSE_ERROR,
226 				"v1_charcode_error",
227 				new Object[] {new Character(c), encoding});
228 	    }
229 
230 
231 	    code = (int)(b[0] & 0xFF);
232 
233 	    if (b.length > 1) {
234 		code = (int)(code | ((b[1] & 0xFF) << 8));
235 	    }
236 
237 	    if (b.length > 2) {
238 		code = (int)(code | ((b[2] & 0xFF) << 16));
239 	    }
240 	}
241 
242 	return Integer.toString(code);
243     }
244 
245     /**
246      * Unescape the character encoded as the string.
247      *
248      * @param ch The character as a string of Integers.
249      * @param encoding The character set encoding to use.
250      * @return The character.
251      * @exception ServiceLocationException Thrown if the string can't
252      *		 be parsed into an integer or if the encoding isn't
253      *		 recognized.
254      */
255 
256     static String unescapeChar(String ch, String encoding)
257 	throws ServiceLocationException {
258 
259 	int code = 0;
260 
261 	try {
262 	    code = Integer.parseInt(ch);
263 
264 	} catch (NumberFormatException ex) {
265 	    throw
266 		new ServiceLocationException(
267 				ServiceLocationException.PARSE_ERROR,
268 				"v1_stringcode_error",
269 				new Object[] {ch, encoding});
270 
271 	}
272 
273 	// Convert to bytes. We need to taylor the array size to the
274 	//  number of bytes because otherwise, in encodings that
275 	//  take less bytes, the resulting string will have garbage
276 	//  in it.
277 
278 	String str = null;
279 	byte b0 = 0, b1 = 0, b2 = 0, b3 = 0;
280 	byte b[] = null;
281 
282 	b0 = (byte) (code & 0xFF);
283 	b1 = (byte) ((code >> 8) & 0xFF);
284 	b2 = (byte) ((code >> 16) & 0xFF);
285 	b3 = (byte) ((code >> 24) & 0xFf);
286 
287 	// We create an array sized to the encoding.
288 
289 	if (encoding.equals(UNICODE_BIG) ||
290 	    encoding.equals(UNICODE_LITTLE)) {
291 	    b = new byte[4];
292 	    b[0] = b0;
293 	    b[1] = b1;
294 	    b[2] = b2;
295 	    b[3] = b3;
296 
297 	} else if (encoding.equals(LATIN1) || encoding.equals(ASCII)) {
298 	    // single byte
299 	    b = new byte[1];
300 	    b[0] = b0;
301 
302 	    if (b1 != 0 || b2 != 0) {
303 		throw
304 		    new ServiceLocationException(
305 				ServiceLocationException.PARSE_ERROR,
306 				"v1_stringcode_error",
307 				new Object[] {ch, encoding});
308 	    }
309 
310 
311 	} else if (encoding.equals(UTF8)) {// vari-byte
312 
313 	    if (b3 != 0) {
314 		throw
315 		    new ServiceLocationException(
316 				ServiceLocationException.PARSE_ERROR,
317 				"v1_stringcode_error",
318 				new Object[] {ch, encoding});
319 	    }
320 
321 	    if (b2 != 0) {
322 		b = new byte[3];
323 		b[2] = b2;
324 		b[1] = b1;
325 		b[0] = b0;
326 	    } else if (b1 != 0) {
327 		b = new byte[2];
328 		b[1] = b1;
329 		b[0] = b0;
330 	    } else {
331 		b = new byte[1];
332 		b[0] = b0;
333 	    }
334 	}
335 
336 	// Make a string out of it.
337 
338 	try {
339 	    str = new String(b, encoding);
340 
341 	} catch (UnsupportedEncodingException ex) {
342 	    Assert.slpassert(false,
343 			  "v1_unsupported_encoding",
344 			  new Object[] {encoding});
345 	}
346 
347 	return str;
348     }
349 
350     // Determine from the flag bytes whether this is big or little endian
351     //  Unicode. If there are no flag bytes, then just return UNICODE.
352 
353     static String getUnicodeEndianess(byte[] bytes) {
354 
355 	if (bytes.length >= 2) {
356 
357 	    if (bytes[0] == UNICODE_LITTLE_FLAG[0] &&
358 		bytes[1] == UNICODE_LITTLE_FLAG[1]) {
359 		return UNICODE_LITTLE;
360 
361 	    } else if (bytes[0] == UNICODE_BIG_FLAG[0] &&
362 		       bytes[1] == UNICODE_BIG_FLAG[1]) {
363 		return UNICODE_BIG;
364 
365 	    }
366 	}
367 
368 	// We can`t tell from the byte header, so it's big endian. But
369 	//  since we need to add the byte header, we say we don't know.
370 
371 	return UNICODE;
372 
373     }
374 
375     // Add the big endian flag to a Unicode string.
376 
377     static byte[] addBigEndianFlag(byte[] bytes) {
378 
379 	byte[] flaggedBytes = new byte[bytes.length + 2];
380 
381 	flaggedBytes[0] = UNICODE_BIG_FLAG[0];
382 	flaggedBytes[1] = UNICODE_BIG_FLAG[1];
383 
384 	System.arraycopy(flaggedBytes, 2, bytes, 0, bytes.length);
385 
386 	return flaggedBytes;
387 
388     }
389 }
390