1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * ident "%Z%%M% %I% %E% SMI" 24 * 25 * Copyright (c) 2001 by Sun Microsystems, Inc. 26 * All rights reserved. 27 * 28 */ 29 30 // SCCS Status: %W% %G% 31 // IANACharCode.java: SLPv1 Character encoding support 32 // Author: James Kempf 33 // Created On: Fri Sep 11 13:24:02 1998 34 // Last Modified By: James Kempf 35 // Last Modified On: Wed Oct 28 14:33:02 1998 36 // Update Count: 7 37 // 38 39 40 package com.sun.slp; 41 42 import java.util.*; 43 import java.io.*; 44 45 /** 46 * The IANACharCode class supports static methods for decoding IANA 47 * character codes into strings appropriate for the Java Writer subclass 48 * encoding String arguments, and for encoding the String descriptions 49 * of character codings into the integer codes. Ideally, Java itself 50 * should support this. 51 * 52 * @version %R%.%L% %D% 53 * @author James Kempf 54 */ 55 56 abstract class IANACharCode extends Object { 57 58 // Character code descriptors. These can be used with the Java 59 // character encoding utilities. For Unicode, we use little on 60 // input, 61 62 static final String ASCII = "Default"; 63 static final String LATIN1 = "latin1"; 64 static final String UTF8 = "UTF8"; 65 static final String UNICODE = "Unicode"; 66 static final String UNICODE_LITTLE = "UnicodeLittle"; 67 static final String UNICODE_BIG = "UnicodeBig"; 68 static final String UNICODE_BIG_NO_HDR = "UnicodeBigNoHdr"; 69 70 // Error code for misidentified character set. 71 72 static final short CHARSET_NOT_UNDERSTOOD = 5; 73 74 // Character codes. 75 76 protected static final int CHAR_ASCII = 3; 77 protected static final int CHAR_LATIN1 = 4; 78 protected static final int CHAR_UTF8 = 6; 79 protected static final int CHAR_UNICODE = 1000; 80 81 // First two bytes indicate that string is big/little endian Unicode. 82 // If this flag isn't set, then big endian is assumed and we 83 // must add the big endian bytes on every call. 84 85 protected static final byte[] UNICODE_LITTLE_FLAG = 86 {(byte)0xFF, (byte)0xFE}; 87 88 protected static final byte[] UNICODE_BIG_FLAG = 89 {(byte)0xFE, (byte)0xFF}; 90 91 /** 92 * Encode the String describing a character encoding into 93 * the approprate integer descriptor code. 94 * 95 * @param encoding The String describing the encoding. 96 * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the 97 * String is not recognized. 98 */ 99 100 static int encodeCharacterEncoding(String encoding) 101 throws ServiceLocationException { 102 103 if (encoding.equals(ASCII)) { 104 return CHAR_ASCII; 105 } else if (encoding.equals(LATIN1)) { 106 return CHAR_LATIN1; 107 } else if (encoding.equals(UTF8)) { 108 return CHAR_UTF8; 109 } else if (encoding.equals(UNICODE)) { 110 return CHAR_UNICODE; 111 } else if (encoding.equals(UNICODE_BIG)) { 112 return CHAR_UNICODE; 113 } else if (encoding.equals(UNICODE_LITTLE)) { 114 return CHAR_UNICODE; 115 } else if (encoding.equals(UNICODE_BIG_NO_HDR)) { 116 return CHAR_UNICODE; 117 } 118 119 throw 120 new ServiceLocationException( 121 CHARSET_NOT_UNDERSTOOD, 122 "v1_unsupported_encoding", 123 new Object[] {encoding}); 124 } 125 126 /** 127 * Decode the integer describing a character encoding into 128 * the approprate String descriptor. 129 * 130 * @param code The integer coding the String set. 131 * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the 132 * integer is not recognized. 133 */ 134 135 static String decodeCharacterEncoding(int code) 136 throws ServiceLocationException { 137 138 switch (code) { 139 case CHAR_ASCII: return ASCII; 140 case CHAR_LATIN1: return LATIN1; 141 case CHAR_UTF8: return UTF8; 142 case CHAR_UNICODE: return UNICODE; 143 } 144 145 throw 146 new ServiceLocationException( 147 CHARSET_NOT_UNDERSTOOD, 148 "v1_unsupported_encoding", 149 new Object[] {Integer.toString(code)}); 150 } 151 152 /** 153 * Return a string of integers giving the character's encoding in 154 * the character set passed in as encoding. 155 * 156 * @param c The character to escape. 157 * @param encoding The character set encoding to use. 158 * @return The character as a string of integers for the encoding. 159 * @exception ServiceLocationException Thrown if the encoding is not 160 * recognized, if the character's encoding 161 * has more than 8 bytes or if the sign bit gets turned on. 162 */ 163 164 static String escapeChar(char c, String encoding) 165 throws ServiceLocationException { 166 167 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 168 169 try { 170 OutputStreamWriter osw = new OutputStreamWriter(baos, encoding); 171 172 osw.write(c); 173 osw.flush(); 174 175 } catch (UnsupportedEncodingException ex) { 176 177 throw 178 new ServiceLocationException( 179 CHARSET_NOT_UNDERSTOOD, 180 "v1_unsupported_encoding", 181 new Object[] {encoding}); 182 183 } catch (IOException ex) { 184 185 } 186 187 byte b[] = baos.toByteArray(); 188 int code = 0; 189 190 // Assemble the character code based on the encoding type. 191 192 if (encoding.equals(UNICODE) || 193 encoding.equals(UNICODE_BIG) || 194 encoding.equals(UNICODE_LITTLE)) { 195 196 code = (int)(b[0] & 0xFF); // control bytes... 197 code = (int)(code | ((b[1] & 0xFF) << 8)); 198 code = (int)(code | ((b[2] & 0xFF) << 16)); 199 code = (int)(code | ((b[3] & 0xFF) << 24)); 200 201 if (b.length <= 4) { 202 throw 203 new ServiceLocationException( 204 ServiceLocationException.PARSE_ERROR, 205 "v1_charcode_error", 206 new Object[] {new Character(c), encoding}); 207 } 208 209 } else if (encoding.equals(ASCII) || encoding.equals(LATIN1)) { 210 211 code = (int)(b[0] & 0xFF); 212 213 if (b.length > 1) { 214 throw 215 new ServiceLocationException( 216 ServiceLocationException.PARSE_ERROR, 217 "v1_charcode_error", 218 new Object[] {new Character(c), encoding}); 219 } 220 } else if (encoding.equals(UTF8)) { 221 222 if (b.length > 3) { 223 throw 224 new ServiceLocationException( 225 ServiceLocationException.PARSE_ERROR, 226 "v1_charcode_error", 227 new Object[] {new Character(c), encoding}); 228 } 229 230 231 code = (int)(b[0] & 0xFF); 232 233 if (b.length > 1) { 234 code = (int)(code | ((b[1] & 0xFF) << 8)); 235 } 236 237 if (b.length > 2) { 238 code = (int)(code | ((b[2] & 0xFF) << 16)); 239 } 240 } 241 242 return Integer.toString(code); 243 } 244 245 /** 246 * Unescape the character encoded as the string. 247 * 248 * @param ch The character as a string of Integers. 249 * @param encoding The character set encoding to use. 250 * @return The character. 251 * @exception ServiceLocationException Thrown if the string can't 252 * be parsed into an integer or if the encoding isn't 253 * recognized. 254 */ 255 256 static String unescapeChar(String ch, String encoding) 257 throws ServiceLocationException { 258 259 int code = 0; 260 261 try { 262 code = Integer.parseInt(ch); 263 264 } catch (NumberFormatException ex) { 265 throw 266 new ServiceLocationException( 267 ServiceLocationException.PARSE_ERROR, 268 "v1_stringcode_error", 269 new Object[] {ch, encoding}); 270 271 } 272 273 // Convert to bytes. We need to taylor the array size to the 274 // number of bytes because otherwise, in encodings that 275 // take less bytes, the resulting string will have garbage 276 // in it. 277 278 String str = null; 279 byte b0 = 0, b1 = 0, b2 = 0, b3 = 0; 280 byte b[] = null; 281 282 b0 = (byte) (code & 0xFF); 283 b1 = (byte) ((code >> 8) & 0xFF); 284 b2 = (byte) ((code >> 16) & 0xFF); 285 b3 = (byte) ((code >> 24) & 0xFf); 286 287 // We create an array sized to the encoding. 288 289 if (encoding.equals(UNICODE_BIG) || 290 encoding.equals(UNICODE_LITTLE)) { 291 b = new byte[4]; 292 b[0] = b0; 293 b[1] = b1; 294 b[2] = b2; 295 b[3] = b3; 296 297 } else if (encoding.equals(LATIN1) || encoding.equals(ASCII)) { 298 // single byte 299 b = new byte[1]; 300 b[0] = b0; 301 302 if (b1 != 0 || b2 != 0) { 303 throw 304 new ServiceLocationException( 305 ServiceLocationException.PARSE_ERROR, 306 "v1_stringcode_error", 307 new Object[] {ch, encoding}); 308 } 309 310 311 } else if (encoding.equals(UTF8)) {// vari-byte 312 313 if (b3 != 0) { 314 throw 315 new ServiceLocationException( 316 ServiceLocationException.PARSE_ERROR, 317 "v1_stringcode_error", 318 new Object[] {ch, encoding}); 319 } 320 321 if (b2 != 0) { 322 b = new byte[3]; 323 b[2] = b2; 324 b[1] = b1; 325 b[0] = b0; 326 } else if (b1 != 0) { 327 b = new byte[2]; 328 b[1] = b1; 329 b[0] = b0; 330 } else { 331 b = new byte[1]; 332 b[0] = b0; 333 } 334 } 335 336 // Make a string out of it. 337 338 try { 339 str = new String(b, encoding); 340 341 } catch (UnsupportedEncodingException ex) { 342 Assert.slpassert(false, 343 "v1_unsupported_encoding", 344 new Object[] {encoding}); 345 } 346 347 return str; 348 } 349 350 // Determine from the flag bytes whether this is big or little endian 351 // Unicode. If there are no flag bytes, then just return UNICODE. 352 353 static String getUnicodeEndianess(byte[] bytes) { 354 355 if (bytes.length >= 2) { 356 357 if (bytes[0] == UNICODE_LITTLE_FLAG[0] && 358 bytes[1] == UNICODE_LITTLE_FLAG[1]) { 359 return UNICODE_LITTLE; 360 361 } else if (bytes[0] == UNICODE_BIG_FLAG[0] && 362 bytes[1] == UNICODE_BIG_FLAG[1]) { 363 return UNICODE_BIG; 364 365 } 366 } 367 368 // We can`t tell from the byte header, so it's big endian. But 369 // since we need to add the byte header, we say we don't know. 370 371 return UNICODE; 372 373 } 374 375 // Add the big endian flag to a Unicode string. 376 377 static byte[] addBigEndianFlag(byte[] bytes) { 378 379 byte[] flaggedBytes = new byte[bytes.length + 2]; 380 381 flaggedBytes[0] = UNICODE_BIG_FLAG[0]; 382 flaggedBytes[1] = UNICODE_BIG_FLAG[1]; 383 384 System.arraycopy(flaggedBytes, 2, bytes, 0, bytes.length); 385 386 return flaggedBytes; 387 388 } 389 } 390