xref: /illumos-gate/usr/src/lib/libslp/javalib/com/sun/slp/IANACharCode.java (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  /*
22   * Copyright (c) 2001 by Sun Microsystems, Inc.
23   * All rights reserved.
24   *
25   */
26  
27  //  IANACharCode.java: SLPv1 Character encoding support
28  //  Author:           James Kempf
29  //  Created On:       Fri Sep 11 13:24:02 1998
30  //  Last Modified By: James Kempf
31  //  Last Modified On: Wed Oct 28 14:33:02 1998
32  //  Update Count:     7
33  //
34  
35  
36  package com.sun.slp;
37  
38  import java.util.*;
39  import java.io.*;
40  
41  /**
42   * The IANACharCode class supports static methods for decoding IANA
43   * character codes into strings appropriate for the Java Writer subclass
44   * encoding String arguments, and for encoding the String descriptions
45   * of character codings into the integer codes. Ideally, Java itself
46   * should support this.
47   *
48   * @author James Kempf
49   */
50  
51  abstract class IANACharCode extends Object {
52  
53      // Character code descriptors. These can be used with the Java
54      //  character encoding utilities. For Unicode, we use little on
55      //  input,
56  
57      static final String ASCII = "Default";
58      static final String LATIN1 = "latin1";
59      static final String UTF8 = "UTF8";
60      static final String UNICODE = "Unicode";
61      static final String UNICODE_LITTLE = "UnicodeLittle";
62      static final String UNICODE_BIG = "UnicodeBig";
63      static final String UNICODE_BIG_NO_HDR = "UnicodeBigNoHdr";
64  
65      // Error code for misidentified character set.
66  
67      static final short CHARSET_NOT_UNDERSTOOD = 5;
68  
69      // Character codes.
70  
71      protected static final int CHAR_ASCII   = 3;
72      protected static final int CHAR_LATIN1  = 4;
73      protected static final int CHAR_UTF8    = 6;
74      protected static final int CHAR_UNICODE = 1000;
75  
76      // First two bytes indicate that string is big/little endian Unicode.
77      //  If this flag isn't set, then big endian is assumed and we
78      //  must add the big endian bytes on every call.
79  
80      protected static final byte[] UNICODE_LITTLE_FLAG =
81  					{(byte)0xFF, (byte)0xFE};
82  
83      protected static final byte[] UNICODE_BIG_FLAG =
84  					{(byte)0xFE, (byte)0xFF};
85  
86      /**
87       * Encode the String describing a character encoding into
88       * the approprate integer descriptor code.
89       *
90       * @param encoding The String describing the encoding.
91       * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the
92       *			String is not recognized.
93       */
94  
encodeCharacterEncoding(String encoding)95      static int encodeCharacterEncoding(String encoding)
96  	throws ServiceLocationException {
97  
98  	if (encoding.equals(ASCII)) {
99  	    return CHAR_ASCII;
100  	} else if (encoding.equals(LATIN1)) {
101  	    return CHAR_LATIN1;
102  	} else if (encoding.equals(UTF8)) {
103  	    return CHAR_UTF8;
104  	} else if (encoding.equals(UNICODE)) {
105  	    return CHAR_UNICODE;
106  	} else if (encoding.equals(UNICODE_BIG)) {
107  	    return CHAR_UNICODE;
108  	} else if (encoding.equals(UNICODE_LITTLE)) {
109  	    return CHAR_UNICODE;
110  	} else if (encoding.equals(UNICODE_BIG_NO_HDR)) {
111  	    return CHAR_UNICODE;
112  	}
113  
114  	throw
115  	    new ServiceLocationException(
116  				CHARSET_NOT_UNDERSTOOD,
117  				"v1_unsupported_encoding",
118  				new Object[] {encoding});
119      }
120  
121      /**
122       * Decode the integer describing a character encoding into
123       * the approprate String descriptor.
124       *
125       * @param code The integer coding the String set.
126       * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the
127       *			integer is not recognized.
128       */
129  
decodeCharacterEncoding(int code)130      static String decodeCharacterEncoding(int code)
131  	throws ServiceLocationException {
132  
133  	switch (code) {
134  	case CHAR_ASCII: 	return ASCII;
135  	case CHAR_LATIN1:	return LATIN1;
136  	case CHAR_UTF8:	return UTF8;
137  	case CHAR_UNICODE:	return UNICODE;
138  	}
139  
140  	throw
141  	    new ServiceLocationException(
142  				CHARSET_NOT_UNDERSTOOD,
143  				"v1_unsupported_encoding",
144  				new Object[] {Integer.toString(code)});
145      }
146  
147      /**
148       * Return a string of integers giving the character's encoding in
149       * the character set passed in as encoding.
150       *
151       * @param c The character to escape.
152       * @param encoding The character set encoding to use.
153       * @return The character as a string of integers for the encoding.
154       * @exception ServiceLocationException Thrown if the encoding is not
155       *		 recognized, if the character's encoding
156       *		 has more than 8 bytes or if the sign bit gets turned on.
157       */
158  
escapeChar(char c, String encoding)159      static String escapeChar(char c, String encoding)
160  	throws ServiceLocationException {
161  
162  	ByteArrayOutputStream baos = new ByteArrayOutputStream();
163  
164  	try {
165  	    OutputStreamWriter osw = new OutputStreamWriter(baos, encoding);
166  
167  	    osw.write(c);
168  	    osw.flush();
169  
170  	} catch (UnsupportedEncodingException ex) {
171  
172  	    throw
173  		new ServiceLocationException(
174  				CHARSET_NOT_UNDERSTOOD,
175  				"v1_unsupported_encoding",
176  				new Object[] {encoding});
177  
178  	} catch (IOException ex) {
179  
180  	}
181  
182  	byte b[] = baos.toByteArray();
183  	int code = 0;
184  
185  	// Assemble the character code based on the encoding type.
186  
187  	if (encoding.equals(UNICODE) ||
188  	    encoding.equals(UNICODE_BIG) ||
189  	    encoding.equals(UNICODE_LITTLE)) {
190  
191  	    code = (int)(b[0] & 0xFF);		// control bytes...
192  	    code = (int)(code | ((b[1] & 0xFF) << 8));
193  	    code = (int)(code | ((b[2] & 0xFF) << 16));
194  	    code = (int)(code | ((b[3] & 0xFF) << 24));
195  
196  	    if (b.length <= 4) {
197  		throw
198  		    new ServiceLocationException(
199  				ServiceLocationException.PARSE_ERROR,
200  				"v1_charcode_error",
201  				new Object[] {new Character(c), encoding});
202  	    }
203  
204  	} else if (encoding.equals(ASCII) || encoding.equals(LATIN1)) {
205  
206  	    code = (int)(b[0] & 0xFF);
207  
208  	    if (b.length > 1) {
209  		throw
210  		    new ServiceLocationException(
211  				ServiceLocationException.PARSE_ERROR,
212  				"v1_charcode_error",
213  				new Object[] {new Character(c), encoding});
214  	    }
215  	} else if (encoding.equals(UTF8)) {
216  
217  	    if (b.length > 3) {
218  		throw
219  		    new ServiceLocationException(
220  				ServiceLocationException.PARSE_ERROR,
221  				"v1_charcode_error",
222  				new Object[] {new Character(c), encoding});
223  	    }
224  
225  
226  	    code = (int)(b[0] & 0xFF);
227  
228  	    if (b.length > 1) {
229  		code = (int)(code | ((b[1] & 0xFF) << 8));
230  	    }
231  
232  	    if (b.length > 2) {
233  		code = (int)(code | ((b[2] & 0xFF) << 16));
234  	    }
235  	}
236  
237  	return Integer.toString(code);
238      }
239  
240      /**
241       * Unescape the character encoded as the string.
242       *
243       * @param ch The character as a string of Integers.
244       * @param encoding The character set encoding to use.
245       * @return The character.
246       * @exception ServiceLocationException Thrown if the string can't
247       *		 be parsed into an integer or if the encoding isn't
248       *		 recognized.
249       */
250  
unescapeChar(String ch, String encoding)251      static String unescapeChar(String ch, String encoding)
252  	throws ServiceLocationException {
253  
254  	int code = 0;
255  
256  	try {
257  	    code = Integer.parseInt(ch);
258  
259  	} catch (NumberFormatException ex) {
260  	    throw
261  		new ServiceLocationException(
262  				ServiceLocationException.PARSE_ERROR,
263  				"v1_stringcode_error",
264  				new Object[] {ch, encoding});
265  
266  	}
267  
268  	// Convert to bytes. We need to taylor the array size to the
269  	//  number of bytes because otherwise, in encodings that
270  	//  take less bytes, the resulting string will have garbage
271  	//  in it.
272  
273  	String str = null;
274  	byte b0 = 0, b1 = 0, b2 = 0, b3 = 0;
275  	byte b[] = null;
276  
277  	b0 = (byte) (code & 0xFF);
278  	b1 = (byte) ((code >> 8) & 0xFF);
279  	b2 = (byte) ((code >> 16) & 0xFF);
280  	b3 = (byte) ((code >> 24) & 0xFf);
281  
282  	// We create an array sized to the encoding.
283  
284  	if (encoding.equals(UNICODE_BIG) ||
285  	    encoding.equals(UNICODE_LITTLE)) {
286  	    b = new byte[4];
287  	    b[0] = b0;
288  	    b[1] = b1;
289  	    b[2] = b2;
290  	    b[3] = b3;
291  
292  	} else if (encoding.equals(LATIN1) || encoding.equals(ASCII)) {
293  	    // single byte
294  	    b = new byte[1];
295  	    b[0] = b0;
296  
297  	    if (b1 != 0 || b2 != 0) {
298  		throw
299  		    new ServiceLocationException(
300  				ServiceLocationException.PARSE_ERROR,
301  				"v1_stringcode_error",
302  				new Object[] {ch, encoding});
303  	    }
304  
305  
306  	} else if (encoding.equals(UTF8)) {// vari-byte
307  
308  	    if (b3 != 0) {
309  		throw
310  		    new ServiceLocationException(
311  				ServiceLocationException.PARSE_ERROR,
312  				"v1_stringcode_error",
313  				new Object[] {ch, encoding});
314  	    }
315  
316  	    if (b2 != 0) {
317  		b = new byte[3];
318  		b[2] = b2;
319  		b[1] = b1;
320  		b[0] = b0;
321  	    } else if (b1 != 0) {
322  		b = new byte[2];
323  		b[1] = b1;
324  		b[0] = b0;
325  	    } else {
326  		b = new byte[1];
327  		b[0] = b0;
328  	    }
329  	}
330  
331  	// Make a string out of it.
332  
333  	try {
334  	    str = new String(b, encoding);
335  
336  	} catch (UnsupportedEncodingException ex) {
337  	    Assert.slpassert(false,
338  			  "v1_unsupported_encoding",
339  			  new Object[] {encoding});
340  	}
341  
342  	return str;
343      }
344  
345      // Determine from the flag bytes whether this is big or little endian
346      //  Unicode. If there are no flag bytes, then just return UNICODE.
347  
getUnicodeEndianess(byte[] bytes)348      static String getUnicodeEndianess(byte[] bytes) {
349  
350  	if (bytes.length >= 2) {
351  
352  	    if (bytes[0] == UNICODE_LITTLE_FLAG[0] &&
353  		bytes[1] == UNICODE_LITTLE_FLAG[1]) {
354  		return UNICODE_LITTLE;
355  
356  	    } else if (bytes[0] == UNICODE_BIG_FLAG[0] &&
357  		       bytes[1] == UNICODE_BIG_FLAG[1]) {
358  		return UNICODE_BIG;
359  
360  	    }
361  	}
362  
363  	// We can`t tell from the byte header, so it's big endian. But
364  	//  since we need to add the byte header, we say we don't know.
365  
366  	return UNICODE;
367  
368      }
369  
370      // Add the big endian flag to a Unicode string.
371  
addBigEndianFlag(byte[] bytes)372      static byte[] addBigEndianFlag(byte[] bytes) {
373  
374  	byte[] flaggedBytes = new byte[bytes.length + 2];
375  
376  	flaggedBytes[0] = UNICODE_BIG_FLAG[0];
377  	flaggedBytes[1] = UNICODE_BIG_FLAG[1];
378  
379  	System.arraycopy(flaggedBytes, 2, bytes, 0, bytes.length);
380  
381  	return flaggedBytes;
382  
383      }
384  }
385