xref: /freebsd/crypto/krb5/src/include/k5-utf8.h (revision 7f2fe78b9dd5f51c821d771b63d2e096f6fd49e9)
1 /* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  * Copyright (C) 2008 by the Massachusetts Institute of Technology,
4  * Cambridge, MA, USA.  All Rights Reserved.
5  *
6  * This software is being provided to you, the LICENSEE, by the
7  * Massachusetts Institute of Technology (M.I.T.) under the following
8  * license.  By obtaining, using and/or copying this software, you agree
9  * that you have read, understood, and will comply with these terms and
10  * conditions:
11  *
12  * Export of this software from the United States of America may
13  * require a specific license from the United States Government.
14  * It is the responsibility of any person or organization contemplating
15  * export to obtain such a license before exporting.
16  *
17  * WITHIN THAT CONSTRAINT, permission to use, copy, modify and distribute
18  * this software and its documentation for any purpose and without fee or
19  * royalty is hereby granted, provided that you agree to comply with the
20  * following copyright notice and statements, including the disclaimer, and
21  * that the same appear on ALL copies of the software and documentation,
22  * including modifications that you make for internal use or for
23  * distribution:
24  *
25  * THIS SOFTWARE IS PROVIDED "AS IS", AND M.I.T. MAKES NO REPRESENTATIONS
26  * OR WARRANTIES, EXPRESS OR IMPLIED.  By way of example, but not
27  * limitation, M.I.T. MAKES NO REPRESENTATIONS OR WARRANTIES OF
28  * MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF
29  * THE LICENSED SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY
30  * PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
31  *
32  * The name of the Massachusetts Institute of Technology or M.I.T. may NOT
33  * be used in advertising or publicity pertaining to distribution of the
34  * software.  Title to copyright in this software and any associated
35  * documentation shall at all times remain with M.I.T., and USER agrees to
36  * preserve same.
37  *
38  * Furthermore if you modify this software you must label
39  * your software as modified software and not distribute it in such a
40  * fashion that it might be confused with the original M.I.T. software.
41  */
42 /*
43  * Copyright 1998-2008 The OpenLDAP Foundation.
44  * All rights reserved.
45  *
46  * Redistribution and use in source and binary forms, with or without
47  * modification, are permitted only as authorized by the OpenLDAP
48  * Public License.
49  *
50  * A copy of this license is available in file LICENSE in the
51  * top-level directory of the distribution or, alternatively, at
52  * <https://www.OpenLDAP.org/license.html>.
53  */
54 /*
55  * Copyright (C) 2000 Novell, Inc. All Rights Reserved.
56  *
57  * THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND TREATIES.
58  * USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT TO VERSION
59  * 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS AVAILABLE AT
60  * HTTPS://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE" IN THE
61  * TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION OF THIS
62  * WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP PUBLIC
63  * LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT THE
64  * PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY.
65  */
66 /* This work is part of OpenLDAP Software <https://www.openldap.org/>. */
67 
68 #ifndef K5_UTF8_H
69 #define K5_UTF8_H
70 
71 #include "k5-platform.h"
72 
73 typedef uint16_t krb5_ucs2;
74 typedef uint32_t krb5_ucs4;
75 
76 int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out);
77 size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf);
78 
79 /*
80  * Convert a little-endian UTF-16 string to an allocated null-terminated UTF-8
81  * string.  nbytes is the length of ucs2bytes in bytes, and must be an even
82  * number.  Return EINVAL on invalid input, ENOMEM on out of memory, or 0 on
83  * success.
84  */
85 int k5_utf16le_to_utf8(const uint8_t *utf16bytes, size_t nbytes,
86                        char **utf8_out);
87 
88 /*
89  * Convert a UTF-8 string to an allocated little-endian UTF-16 string.  The
90  * resulting length is in bytes and will always be even.  Return EINVAL on
91  * invalid input, ENOMEM on out of memory, or 0 on success.
92  */
93 int k5_utf8_to_utf16le(const char *utf8, uint8_t **utf16_out,
94                        size_t *nbytes_out);
95 
96 /* Optimizations */
97 extern const char krb5int_utf8_lentab[128];
98 extern const char krb5int_utf8_mintab[32];
99 
100 #define KRB5_UTF8_BV(p) (*(const unsigned char *)(p))
101 #define KRB5_UTF8_ISASCII(p) (!(KRB5_UTF8_BV(p) & 0x80))
102 #define KRB5_UTF8_CHARLEN(p) (KRB5_UTF8_ISASCII(p) ? 1 :                \
103                               krb5int_utf8_lentab[KRB5_UTF8_BV(p) ^ 0x80])
104 
105 /* This is like CHARLEN but additionally validates to make sure
106  * the char used the shortest possible encoding.
107  * 'l' is used to temporarily hold the result of CHARLEN.
108  */
109 #define KRB5_UTF8_CHARLEN2(p, l) (                                      \
110         ((l = KRB5_UTF8_CHARLEN(p)) < 3 ||                              \
111          (krb5int_utf8_mintab[KRB5_UTF8_BV(p) & 0x1f] & (p)[1])) ?      \
112         l : 0)
113 
114 /*
115  * these macros assume 'x' is an ASCII x
116  * and assume the "C" locale
117  */
118 #define KRB5_UPPER(c)           ((c) >= 'A' && (c) <= 'Z')
119 
120 #endif /* K5_UTF8_H */
121