1 /* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2 /* 3 * Copyright (C) 2008 by the Massachusetts Institute of Technology, 4 * Cambridge, MA, USA. All Rights Reserved. 5 * 6 * This software is being provided to you, the LICENSEE, by the 7 * Massachusetts Institute of Technology (M.I.T.) under the following 8 * license. By obtaining, using and/or copying this software, you agree 9 * that you have read, understood, and will comply with these terms and 10 * conditions: 11 * 12 * Export of this software from the United States of America may 13 * require a specific license from the United States Government. 14 * It is the responsibility of any person or organization contemplating 15 * export to obtain such a license before exporting. 16 * 17 * WITHIN THAT CONSTRAINT, permission to use, copy, modify and distribute 18 * this software and its documentation for any purpose and without fee or 19 * royalty is hereby granted, provided that you agree to comply with the 20 * following copyright notice and statements, including the disclaimer, and 21 * that the same appear on ALL copies of the software and documentation, 22 * including modifications that you make for internal use or for 23 * distribution: 24 * 25 * THIS SOFTWARE IS PROVIDED "AS IS", AND M.I.T. MAKES NO REPRESENTATIONS 26 * OR WARRANTIES, EXPRESS OR IMPLIED. By way of example, but not 27 * limitation, M.I.T. MAKES NO REPRESENTATIONS OR WARRANTIES OF 28 * MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF 29 * THE LICENSED SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY 30 * PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. 31 * 32 * The name of the Massachusetts Institute of Technology or M.I.T. may NOT 33 * be used in advertising or publicity pertaining to distribution of the 34 * software. Title to copyright in this software and any associated 35 * documentation shall at all times remain with M.I.T., and USER agrees to 36 * preserve same. 37 * 38 * Furthermore if you modify this software you must label 39 * your software as modified software and not distribute it in such a 40 * fashion that it might be confused with the original M.I.T. software. 41 */ 42 /* 43 * Copyright 1998-2008 The OpenLDAP Foundation. 44 * All rights reserved. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted only as authorized by the OpenLDAP 48 * Public License. 49 * 50 * A copy of this license is available in file LICENSE in the 51 * top-level directory of the distribution or, alternatively, at 52 * <https://www.OpenLDAP.org/license.html>. 53 */ 54 /* 55 * Copyright (C) 2000 Novell, Inc. All Rights Reserved. 56 * 57 * THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND TREATIES. 58 * USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT TO VERSION 59 * 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS AVAILABLE AT 60 * HTTPS://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE" IN THE 61 * TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION OF THIS 62 * WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP PUBLIC 63 * LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT THE 64 * PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY. 65 */ 66 /* This work is part of OpenLDAP Software <https://www.openldap.org/>. */ 67 68 #ifndef K5_UTF8_H 69 #define K5_UTF8_H 70 71 #include "k5-platform.h" 72 73 typedef uint16_t krb5_ucs2; 74 typedef uint32_t krb5_ucs4; 75 76 int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out); 77 size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf); 78 79 /* 80 * Convert a little-endian UTF-16 string to an allocated null-terminated UTF-8 81 * string. nbytes is the length of ucs2bytes in bytes, and must be an even 82 * number. Return EINVAL on invalid input, ENOMEM on out of memory, or 0 on 83 * success. 84 */ 85 int k5_utf16le_to_utf8(const uint8_t *utf16bytes, size_t nbytes, 86 char **utf8_out); 87 88 /* 89 * Convert a UTF-8 string to an allocated little-endian UTF-16 string. The 90 * resulting length is in bytes and will always be even. Return EINVAL on 91 * invalid input, ENOMEM on out of memory, or 0 on success. 92 */ 93 int k5_utf8_to_utf16le(const char *utf8, uint8_t **utf16_out, 94 size_t *nbytes_out); 95 96 /* Optimizations */ 97 extern const char krb5int_utf8_lentab[128]; 98 extern const char krb5int_utf8_mintab[32]; 99 100 #define KRB5_UTF8_BV(p) (*(const unsigned char *)(p)) 101 #define KRB5_UTF8_ISASCII(p) (!(KRB5_UTF8_BV(p) & 0x80)) 102 #define KRB5_UTF8_CHARLEN(p) (KRB5_UTF8_ISASCII(p) ? 1 : \ 103 krb5int_utf8_lentab[KRB5_UTF8_BV(p) ^ 0x80]) 104 105 /* This is like CHARLEN but additionally validates to make sure 106 * the char used the shortest possible encoding. 107 * 'l' is used to temporarily hold the result of CHARLEN. 108 */ 109 #define KRB5_UTF8_CHARLEN2(p, l) ( \ 110 ((l = KRB5_UTF8_CHARLEN(p)) < 3 || \ 111 (krb5int_utf8_mintab[KRB5_UTF8_BV(p) & 0x1f] & (p)[1])) ? \ 112 l : 0) 113 114 /* 115 * these macros assume 'x' is an ASCII x 116 * and assume the "C" locale 117 */ 118 #define KRB5_UPPER(c) ((c) >= 'A' && (c) <= 'Z') 119 120 #endif /* K5_UTF8_H */ 121