1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2001 by Sun Microsystems, Inc. 23 * All rights reserved. 24 * 25 */ 26 27 // IANACharCode.java: SLPv1 Character encoding support 28 // Author: James Kempf 29 // Created On: Fri Sep 11 13:24:02 1998 30 // Last Modified By: James Kempf 31 // Last Modified On: Wed Oct 28 14:33:02 1998 32 // Update Count: 7 33 // 34 35 36 package com.sun.slp; 37 38 import java.util.*; 39 import java.io.*; 40 41 /** 42 * The IANACharCode class supports static methods for decoding IANA 43 * character codes into strings appropriate for the Java Writer subclass 44 * encoding String arguments, and for encoding the String descriptions 45 * of character codings into the integer codes. Ideally, Java itself 46 * should support this. 47 * 48 * @author James Kempf 49 */ 50 51 abstract class IANACharCode extends Object { 52 53 // Character code descriptors. These can be used with the Java 54 // character encoding utilities. For Unicode, we use little on 55 // input, 56 57 static final String ASCII = "Default"; 58 static final String LATIN1 = "latin1"; 59 static final String UTF8 = "UTF8"; 60 static final String UNICODE = "Unicode"; 61 static final String UNICODE_LITTLE = "UnicodeLittle"; 62 static final String UNICODE_BIG = "UnicodeBig"; 63 static final String UNICODE_BIG_NO_HDR = "UnicodeBigNoHdr"; 64 65 // Error code for misidentified character set. 66 67 static final short CHARSET_NOT_UNDERSTOOD = 5; 68 69 // Character codes. 70 71 protected static final int CHAR_ASCII = 3; 72 protected static final int CHAR_LATIN1 = 4; 73 protected static final int CHAR_UTF8 = 6; 74 protected static final int CHAR_UNICODE = 1000; 75 76 // First two bytes indicate that string is big/little endian Unicode. 77 // If this flag isn't set, then big endian is assumed and we 78 // must add the big endian bytes on every call. 79 80 protected static final byte[] UNICODE_LITTLE_FLAG = 81 {(byte)0xFF, (byte)0xFE}; 82 83 protected static final byte[] UNICODE_BIG_FLAG = 84 {(byte)0xFE, (byte)0xFF}; 85 86 /** 87 * Encode the String describing a character encoding into 88 * the approprate integer descriptor code. 89 * 90 * @param encoding The String describing the encoding. 91 * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the 92 * String is not recognized. 93 */ 94 95 static int encodeCharacterEncoding(String encoding) 96 throws ServiceLocationException { 97 98 if (encoding.equals(ASCII)) { 99 return CHAR_ASCII; 100 } else if (encoding.equals(LATIN1)) { 101 return CHAR_LATIN1; 102 } else if (encoding.equals(UTF8)) { 103 return CHAR_UTF8; 104 } else if (encoding.equals(UNICODE)) { 105 return CHAR_UNICODE; 106 } else if (encoding.equals(UNICODE_BIG)) { 107 return CHAR_UNICODE; 108 } else if (encoding.equals(UNICODE_LITTLE)) { 109 return CHAR_UNICODE; 110 } else if (encoding.equals(UNICODE_BIG_NO_HDR)) { 111 return CHAR_UNICODE; 112 } 113 114 throw 115 new ServiceLocationException( 116 CHARSET_NOT_UNDERSTOOD, 117 "v1_unsupported_encoding", 118 new Object[] {encoding}); 119 } 120 121 /** 122 * Decode the integer describing a character encoding into 123 * the approprate String descriptor. 124 * 125 * @param code The integer coding the String set. 126 * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the 127 * integer is not recognized. 128 */ 129 130 static String decodeCharacterEncoding(int code) 131 throws ServiceLocationException { 132 133 switch (code) { 134 case CHAR_ASCII: return ASCII; 135 case CHAR_LATIN1: return LATIN1; 136 case CHAR_UTF8: return UTF8; 137 case CHAR_UNICODE: return UNICODE; 138 } 139 140 throw 141 new ServiceLocationException( 142 CHARSET_NOT_UNDERSTOOD, 143 "v1_unsupported_encoding", 144 new Object[] {Integer.toString(code)}); 145 } 146 147 /** 148 * Return a string of integers giving the character's encoding in 149 * the character set passed in as encoding. 150 * 151 * @param c The character to escape. 152 * @param encoding The character set encoding to use. 153 * @return The character as a string of integers for the encoding. 154 * @exception ServiceLocationException Thrown if the encoding is not 155 * recognized, if the character's encoding 156 * has more than 8 bytes or if the sign bit gets turned on. 157 */ 158 159 static String escapeChar(char c, String encoding) 160 throws ServiceLocationException { 161 162 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 163 164 try { 165 OutputStreamWriter osw = new OutputStreamWriter(baos, encoding); 166 167 osw.write(c); 168 osw.flush(); 169 170 } catch (UnsupportedEncodingException ex) { 171 172 throw 173 new ServiceLocationException( 174 CHARSET_NOT_UNDERSTOOD, 175 "v1_unsupported_encoding", 176 new Object[] {encoding}); 177 178 } catch (IOException ex) { 179 180 } 181 182 byte b[] = baos.toByteArray(); 183 int code = 0; 184 185 // Assemble the character code based on the encoding type. 186 187 if (encoding.equals(UNICODE) || 188 encoding.equals(UNICODE_BIG) || 189 encoding.equals(UNICODE_LITTLE)) { 190 191 code = (int)(b[0] & 0xFF); // control bytes... 192 code = (int)(code | ((b[1] & 0xFF) << 8)); 193 code = (int)(code | ((b[2] & 0xFF) << 16)); 194 code = (int)(code | ((b[3] & 0xFF) << 24)); 195 196 if (b.length <= 4) { 197 throw 198 new ServiceLocationException( 199 ServiceLocationException.PARSE_ERROR, 200 "v1_charcode_error", 201 new Object[] {new Character(c), encoding}); 202 } 203 204 } else if (encoding.equals(ASCII) || encoding.equals(LATIN1)) { 205 206 code = (int)(b[0] & 0xFF); 207 208 if (b.length > 1) { 209 throw 210 new ServiceLocationException( 211 ServiceLocationException.PARSE_ERROR, 212 "v1_charcode_error", 213 new Object[] {new Character(c), encoding}); 214 } 215 } else if (encoding.equals(UTF8)) { 216 217 if (b.length > 3) { 218 throw 219 new ServiceLocationException( 220 ServiceLocationException.PARSE_ERROR, 221 "v1_charcode_error", 222 new Object[] {new Character(c), encoding}); 223 } 224 225 226 code = (int)(b[0] & 0xFF); 227 228 if (b.length > 1) { 229 code = (int)(code | ((b[1] & 0xFF) << 8)); 230 } 231 232 if (b.length > 2) { 233 code = (int)(code | ((b[2] & 0xFF) << 16)); 234 } 235 } 236 237 return Integer.toString(code); 238 } 239 240 /** 241 * Unescape the character encoded as the string. 242 * 243 * @param ch The character as a string of Integers. 244 * @param encoding The character set encoding to use. 245 * @return The character. 246 * @exception ServiceLocationException Thrown if the string can't 247 * be parsed into an integer or if the encoding isn't 248 * recognized. 249 */ 250 251 static String unescapeChar(String ch, String encoding) 252 throws ServiceLocationException { 253 254 int code = 0; 255 256 try { 257 code = Integer.parseInt(ch); 258 259 } catch (NumberFormatException ex) { 260 throw 261 new ServiceLocationException( 262 ServiceLocationException.PARSE_ERROR, 263 "v1_stringcode_error", 264 new Object[] {ch, encoding}); 265 266 } 267 268 // Convert to bytes. We need to taylor the array size to the 269 // number of bytes because otherwise, in encodings that 270 // take less bytes, the resulting string will have garbage 271 // in it. 272 273 String str = null; 274 byte b0 = 0, b1 = 0, b2 = 0, b3 = 0; 275 byte b[] = null; 276 277 b0 = (byte) (code & 0xFF); 278 b1 = (byte) ((code >> 8) & 0xFF); 279 b2 = (byte) ((code >> 16) & 0xFF); 280 b3 = (byte) ((code >> 24) & 0xFf); 281 282 // We create an array sized to the encoding. 283 284 if (encoding.equals(UNICODE_BIG) || 285 encoding.equals(UNICODE_LITTLE)) { 286 b = new byte[4]; 287 b[0] = b0; 288 b[1] = b1; 289 b[2] = b2; 290 b[3] = b3; 291 292 } else if (encoding.equals(LATIN1) || encoding.equals(ASCII)) { 293 // single byte 294 b = new byte[1]; 295 b[0] = b0; 296 297 if (b1 != 0 || b2 != 0) { 298 throw 299 new ServiceLocationException( 300 ServiceLocationException.PARSE_ERROR, 301 "v1_stringcode_error", 302 new Object[] {ch, encoding}); 303 } 304 305 306 } else if (encoding.equals(UTF8)) {// vari-byte 307 308 if (b3 != 0) { 309 throw 310 new ServiceLocationException( 311 ServiceLocationException.PARSE_ERROR, 312 "v1_stringcode_error", 313 new Object[] {ch, encoding}); 314 } 315 316 if (b2 != 0) { 317 b = new byte[3]; 318 b[2] = b2; 319 b[1] = b1; 320 b[0] = b0; 321 } else if (b1 != 0) { 322 b = new byte[2]; 323 b[1] = b1; 324 b[0] = b0; 325 } else { 326 b = new byte[1]; 327 b[0] = b0; 328 } 329 } 330 331 // Make a string out of it. 332 333 try { 334 str = new String(b, encoding); 335 336 } catch (UnsupportedEncodingException ex) { 337 Assert.slpassert(false, 338 "v1_unsupported_encoding", 339 new Object[] {encoding}); 340 } 341 342 return str; 343 } 344 345 // Determine from the flag bytes whether this is big or little endian 346 // Unicode. If there are no flag bytes, then just return UNICODE. 347 348 static String getUnicodeEndianess(byte[] bytes) { 349 350 if (bytes.length >= 2) { 351 352 if (bytes[0] == UNICODE_LITTLE_FLAG[0] && 353 bytes[1] == UNICODE_LITTLE_FLAG[1]) { 354 return UNICODE_LITTLE; 355 356 } else if (bytes[0] == UNICODE_BIG_FLAG[0] && 357 bytes[1] == UNICODE_BIG_FLAG[1]) { 358 return UNICODE_BIG; 359 360 } 361 } 362 363 // We can`t tell from the byte header, so it's big endian. But 364 // since we need to add the byte header, we say we don't know. 365 366 return UNICODE; 367 368 } 369 370 // Add the big endian flag to a Unicode string. 371 372 static byte[] addBigEndianFlag(byte[] bytes) { 373 374 byte[] flaggedBytes = new byte[bytes.length + 2]; 375 376 flaggedBytes[0] = UNICODE_BIG_FLAG[0]; 377 flaggedBytes[1] = UNICODE_BIG_FLAG[1]; 378 379 System.arraycopy(flaggedBytes, 2, bytes, 0, bytes.length); 380 381 return flaggedBytes; 382 383 } 384 } 385