xref: /illumos-gate/usr/src/lib/libslp/javalib/com/sun/slp/IANACharCode.java (revision 07a48826732249fcd3aa8dd53c8389595e9f1fbc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2001 by Sun Microsystems, Inc.
23  * All rights reserved.
24  *
25  */
26 
27 //  IANACharCode.java: SLPv1 Character encoding support
28 //  Author:           James Kempf
29 //  Created On:       Fri Sep 11 13:24:02 1998
30 //  Last Modified By: James Kempf
31 //  Last Modified On: Wed Oct 28 14:33:02 1998
32 //  Update Count:     7
33 //
34 
35 
36 package com.sun.slp;
37 
38 import java.util.*;
39 import java.io.*;
40 
41 /**
42  * The IANACharCode class supports static methods for decoding IANA
43  * character codes into strings appropriate for the Java Writer subclass
44  * encoding String arguments, and for encoding the String descriptions
45  * of character codings into the integer codes. Ideally, Java itself
46  * should support this.
47  *
48  * @author James Kempf
49  */
50 
51 abstract class IANACharCode extends Object {
52 
53     // Character code descriptors. These can be used with the Java
54     //  character encoding utilities. For Unicode, we use little on
55     //  input,
56 
57     static final String ASCII = "Default";
58     static final String LATIN1 = "latin1";
59     static final String UTF8 = "UTF8";
60     static final String UNICODE = "Unicode";
61     static final String UNICODE_LITTLE = "UnicodeLittle";
62     static final String UNICODE_BIG = "UnicodeBig";
63     static final String UNICODE_BIG_NO_HDR = "UnicodeBigNoHdr";
64 
65     // Error code for misidentified character set.
66 
67     static final short CHARSET_NOT_UNDERSTOOD = 5;
68 
69     // Character codes.
70 
71     protected static final int CHAR_ASCII   = 3;
72     protected static final int CHAR_LATIN1  = 4;
73     protected static final int CHAR_UTF8    = 6;
74     protected static final int CHAR_UNICODE = 1000;
75 
76     // First two bytes indicate that string is big/little endian Unicode.
77     //  If this flag isn't set, then big endian is assumed and we
78     //  must add the big endian bytes on every call.
79 
80     protected static final byte[] UNICODE_LITTLE_FLAG =
81 					{(byte)0xFF, (byte)0xFE};
82 
83     protected static final byte[] UNICODE_BIG_FLAG =
84 					{(byte)0xFE, (byte)0xFF};
85 
86     /**
87      * Encode the String describing a character encoding into
88      * the approprate integer descriptor code.
89      *
90      * @param encoding The String describing the encoding.
91      * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the
92      *			String is not recognized.
93      */
94 
95     static int encodeCharacterEncoding(String encoding)
96 	throws ServiceLocationException {
97 
98 	if (encoding.equals(ASCII)) {
99 	    return CHAR_ASCII;
100 	} else if (encoding.equals(LATIN1)) {
101 	    return CHAR_LATIN1;
102 	} else if (encoding.equals(UTF8)) {
103 	    return CHAR_UTF8;
104 	} else if (encoding.equals(UNICODE)) {
105 	    return CHAR_UNICODE;
106 	} else if (encoding.equals(UNICODE_BIG)) {
107 	    return CHAR_UNICODE;
108 	} else if (encoding.equals(UNICODE_LITTLE)) {
109 	    return CHAR_UNICODE;
110 	} else if (encoding.equals(UNICODE_BIG_NO_HDR)) {
111 	    return CHAR_UNICODE;
112 	}
113 
114 	throw
115 	    new ServiceLocationException(
116 				CHARSET_NOT_UNDERSTOOD,
117 				"v1_unsupported_encoding",
118 				new Object[] {encoding});
119     }
120 
121     /**
122      * Decode the integer describing a character encoding into
123      * the approprate String descriptor.
124      *
125      * @param code The integer coding the String set.
126      * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the
127      *			integer is not recognized.
128      */
129 
130     static String decodeCharacterEncoding(int code)
131 	throws ServiceLocationException {
132 
133 	switch (code) {
134 	case CHAR_ASCII: 	return ASCII;
135 	case CHAR_LATIN1:	return LATIN1;
136 	case CHAR_UTF8:	return UTF8;
137 	case CHAR_UNICODE:	return UNICODE;
138 	}
139 
140 	throw
141 	    new ServiceLocationException(
142 				CHARSET_NOT_UNDERSTOOD,
143 				"v1_unsupported_encoding",
144 				new Object[] {Integer.toString(code)});
145     }
146 
147     /**
148      * Return a string of integers giving the character's encoding in
149      * the character set passed in as encoding.
150      *
151      * @param c The character to escape.
152      * @param encoding The character set encoding to use.
153      * @return The character as a string of integers for the encoding.
154      * @exception ServiceLocationException Thrown if the encoding is not
155      *		 recognized, if the character's encoding
156      *		 has more than 8 bytes or if the sign bit gets turned on.
157      */
158 
159     static String escapeChar(char c, String encoding)
160 	throws ServiceLocationException {
161 
162 	ByteArrayOutputStream baos = new ByteArrayOutputStream();
163 
164 	try {
165 	    OutputStreamWriter osw = new OutputStreamWriter(baos, encoding);
166 
167 	    osw.write(c);
168 	    osw.flush();
169 
170 	} catch (UnsupportedEncodingException ex) {
171 
172 	    throw
173 		new ServiceLocationException(
174 				CHARSET_NOT_UNDERSTOOD,
175 				"v1_unsupported_encoding",
176 				new Object[] {encoding});
177 
178 	} catch (IOException ex) {
179 
180 	}
181 
182 	byte b[] = baos.toByteArray();
183 	int code = 0;
184 
185 	// Assemble the character code based on the encoding type.
186 
187 	if (encoding.equals(UNICODE) ||
188 	    encoding.equals(UNICODE_BIG) ||
189 	    encoding.equals(UNICODE_LITTLE)) {
190 
191 	    code = (int)(b[0] & 0xFF);		// control bytes...
192 	    code = (int)(code | ((b[1] & 0xFF) << 8));
193 	    code = (int)(code | ((b[2] & 0xFF) << 16));
194 	    code = (int)(code | ((b[3] & 0xFF) << 24));
195 
196 	    if (b.length <= 4) {
197 		throw
198 		    new ServiceLocationException(
199 				ServiceLocationException.PARSE_ERROR,
200 				"v1_charcode_error",
201 				new Object[] {new Character(c), encoding});
202 	    }
203 
204 	} else if (encoding.equals(ASCII) || encoding.equals(LATIN1)) {
205 
206 	    code = (int)(b[0] & 0xFF);
207 
208 	    if (b.length > 1) {
209 		throw
210 		    new ServiceLocationException(
211 				ServiceLocationException.PARSE_ERROR,
212 				"v1_charcode_error",
213 				new Object[] {new Character(c), encoding});
214 	    }
215 	} else if (encoding.equals(UTF8)) {
216 
217 	    if (b.length > 3) {
218 		throw
219 		    new ServiceLocationException(
220 				ServiceLocationException.PARSE_ERROR,
221 				"v1_charcode_error",
222 				new Object[] {new Character(c), encoding});
223 	    }
224 
225 
226 	    code = (int)(b[0] & 0xFF);
227 
228 	    if (b.length > 1) {
229 		code = (int)(code | ((b[1] & 0xFF) << 8));
230 	    }
231 
232 	    if (b.length > 2) {
233 		code = (int)(code | ((b[2] & 0xFF) << 16));
234 	    }
235 	}
236 
237 	return Integer.toString(code);
238     }
239 
240     /**
241      * Unescape the character encoded as the string.
242      *
243      * @param ch The character as a string of Integers.
244      * @param encoding The character set encoding to use.
245      * @return The character.
246      * @exception ServiceLocationException Thrown if the string can't
247      *		 be parsed into an integer or if the encoding isn't
248      *		 recognized.
249      */
250 
251     static String unescapeChar(String ch, String encoding)
252 	throws ServiceLocationException {
253 
254 	int code = 0;
255 
256 	try {
257 	    code = Integer.parseInt(ch);
258 
259 	} catch (NumberFormatException ex) {
260 	    throw
261 		new ServiceLocationException(
262 				ServiceLocationException.PARSE_ERROR,
263 				"v1_stringcode_error",
264 				new Object[] {ch, encoding});
265 
266 	}
267 
268 	// Convert to bytes. We need to taylor the array size to the
269 	//  number of bytes because otherwise, in encodings that
270 	//  take less bytes, the resulting string will have garbage
271 	//  in it.
272 
273 	String str = null;
274 	byte b0 = 0, b1 = 0, b2 = 0, b3 = 0;
275 	byte b[] = null;
276 
277 	b0 = (byte) (code & 0xFF);
278 	b1 = (byte) ((code >> 8) & 0xFF);
279 	b2 = (byte) ((code >> 16) & 0xFF);
280 	b3 = (byte) ((code >> 24) & 0xFf);
281 
282 	// We create an array sized to the encoding.
283 
284 	if (encoding.equals(UNICODE_BIG) ||
285 	    encoding.equals(UNICODE_LITTLE)) {
286 	    b = new byte[4];
287 	    b[0] = b0;
288 	    b[1] = b1;
289 	    b[2] = b2;
290 	    b[3] = b3;
291 
292 	} else if (encoding.equals(LATIN1) || encoding.equals(ASCII)) {
293 	    // single byte
294 	    b = new byte[1];
295 	    b[0] = b0;
296 
297 	    if (b1 != 0 || b2 != 0) {
298 		throw
299 		    new ServiceLocationException(
300 				ServiceLocationException.PARSE_ERROR,
301 				"v1_stringcode_error",
302 				new Object[] {ch, encoding});
303 	    }
304 
305 
306 	} else if (encoding.equals(UTF8)) {// vari-byte
307 
308 	    if (b3 != 0) {
309 		throw
310 		    new ServiceLocationException(
311 				ServiceLocationException.PARSE_ERROR,
312 				"v1_stringcode_error",
313 				new Object[] {ch, encoding});
314 	    }
315 
316 	    if (b2 != 0) {
317 		b = new byte[3];
318 		b[2] = b2;
319 		b[1] = b1;
320 		b[0] = b0;
321 	    } else if (b1 != 0) {
322 		b = new byte[2];
323 		b[1] = b1;
324 		b[0] = b0;
325 	    } else {
326 		b = new byte[1];
327 		b[0] = b0;
328 	    }
329 	}
330 
331 	// Make a string out of it.
332 
333 	try {
334 	    str = new String(b, encoding);
335 
336 	} catch (UnsupportedEncodingException ex) {
337 	    Assert.slpassert(false,
338 			  "v1_unsupported_encoding",
339 			  new Object[] {encoding});
340 	}
341 
342 	return str;
343     }
344 
345     // Determine from the flag bytes whether this is big or little endian
346     //  Unicode. If there are no flag bytes, then just return UNICODE.
347 
348     static String getUnicodeEndianess(byte[] bytes) {
349 
350 	if (bytes.length >= 2) {
351 
352 	    if (bytes[0] == UNICODE_LITTLE_FLAG[0] &&
353 		bytes[1] == UNICODE_LITTLE_FLAG[1]) {
354 		return UNICODE_LITTLE;
355 
356 	    } else if (bytes[0] == UNICODE_BIG_FLAG[0] &&
357 		       bytes[1] == UNICODE_BIG_FLAG[1]) {
358 		return UNICODE_BIG;
359 
360 	    }
361 	}
362 
363 	// We can`t tell from the byte header, so it's big endian. But
364 	//  since we need to add the byte header, we say we don't know.
365 
366 	return UNICODE;
367 
368     }
369 
370     // Add the big endian flag to a Unicode string.
371 
372     static byte[] addBigEndianFlag(byte[] bytes) {
373 
374 	byte[] flaggedBytes = new byte[bytes.length + 2];
375 
376 	flaggedBytes[0] = UNICODE_BIG_FLAG[0];
377 	flaggedBytes[1] = UNICODE_BIG_FLAG[1];
378 
379 	System.arraycopy(flaggedBytes, 2, bytes, 0, bytes.length);
380 
381 	return flaggedBytes;
382 
383     }
384 }
385