xref: /freebsd/crypto/heimdal/lib/wind/punycode.c (revision 6a068746777241722b2b32c5d0bc443a2a64d80b)
1*ae771770SStanislav Sedov /*
2*ae771770SStanislav Sedov  * Copyright (c) 2004 Kungliga Tekniska Högskolan
3*ae771770SStanislav Sedov  * (Royal Institute of Technology, Stockholm, Sweden).
4*ae771770SStanislav Sedov  * All rights reserved.
5*ae771770SStanislav Sedov  *
6*ae771770SStanislav Sedov  * Redistribution and use in source and binary forms, with or without
7*ae771770SStanislav Sedov  * modification, are permitted provided that the following conditions
8*ae771770SStanislav Sedov  * are met:
9*ae771770SStanislav Sedov  *
10*ae771770SStanislav Sedov  * 1. Redistributions of source code must retain the above copyright
11*ae771770SStanislav Sedov  *    notice, this list of conditions and the following disclaimer.
12*ae771770SStanislav Sedov  *
13*ae771770SStanislav Sedov  * 2. Redistributions in binary form must reproduce the above copyright
14*ae771770SStanislav Sedov  *    notice, this list of conditions and the following disclaimer in the
15*ae771770SStanislav Sedov  *    documentation and/or other materials provided with the distribution.
16*ae771770SStanislav Sedov  *
17*ae771770SStanislav Sedov  * 3. Neither the name of the Institute nor the names of its contributors
18*ae771770SStanislav Sedov  *    may be used to endorse or promote products derived from this software
19*ae771770SStanislav Sedov  *    without specific prior written permission.
20*ae771770SStanislav Sedov  *
21*ae771770SStanislav Sedov  * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
22*ae771770SStanislav Sedov  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23*ae771770SStanislav Sedov  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24*ae771770SStanislav Sedov  * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
25*ae771770SStanislav Sedov  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26*ae771770SStanislav Sedov  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27*ae771770SStanislav Sedov  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28*ae771770SStanislav Sedov  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29*ae771770SStanislav Sedov  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30*ae771770SStanislav Sedov  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31*ae771770SStanislav Sedov  * SUCH DAMAGE.
32*ae771770SStanislav Sedov  */
33*ae771770SStanislav Sedov 
34*ae771770SStanislav Sedov #ifdef HAVE_CONFIG_H
35*ae771770SStanislav Sedov #include <config.h>
36*ae771770SStanislav Sedov #endif
37*ae771770SStanislav Sedov #include <string.h>
38*ae771770SStanislav Sedov #include "windlocl.h"
39*ae771770SStanislav Sedov 
40*ae771770SStanislav Sedov static const unsigned base         = 36;
41*ae771770SStanislav Sedov static const unsigned t_min        = 1;
42*ae771770SStanislav Sedov static const unsigned t_max        = 26;
43*ae771770SStanislav Sedov static const unsigned skew         = 38;
44*ae771770SStanislav Sedov static const unsigned damp         = 700;
45*ae771770SStanislav Sedov static const unsigned initial_n    = 128;
46*ae771770SStanislav Sedov static const unsigned initial_bias = 72;
47*ae771770SStanislav Sedov 
48*ae771770SStanislav Sedov static unsigned
digit(unsigned n)49*ae771770SStanislav Sedov digit(unsigned n)
50*ae771770SStanislav Sedov {
51*ae771770SStanislav Sedov     return "abcdefghijklmnopqrstuvwxyz0123456789"[n];
52*ae771770SStanislav Sedov }
53*ae771770SStanislav Sedov 
54*ae771770SStanislav Sedov static unsigned
adapt(unsigned delta,unsigned numpoints,int first)55*ae771770SStanislav Sedov adapt(unsigned delta, unsigned numpoints, int first)
56*ae771770SStanislav Sedov {
57*ae771770SStanislav Sedov     unsigned k;
58*ae771770SStanislav Sedov 
59*ae771770SStanislav Sedov     if (first)
60*ae771770SStanislav Sedov 	delta = delta / damp;
61*ae771770SStanislav Sedov     else
62*ae771770SStanislav Sedov 	delta /= 2;
63*ae771770SStanislav Sedov     delta += delta / numpoints;
64*ae771770SStanislav Sedov     k = 0;
65*ae771770SStanislav Sedov     while (delta > ((base - t_min) * t_max) / 2) {
66*ae771770SStanislav Sedov 	delta /= base - t_min;
67*ae771770SStanislav Sedov 	k += base;
68*ae771770SStanislav Sedov     }
69*ae771770SStanislav Sedov     return k + (((base - t_min + 1) * delta) / (delta + skew));
70*ae771770SStanislav Sedov }
71*ae771770SStanislav Sedov 
72*ae771770SStanislav Sedov /**
73*ae771770SStanislav Sedov  * Convert an UCS4 string to a puny-coded DNS label string suitable
74*ae771770SStanislav Sedov  * when combined with delimiters and other labels for DNS lookup.
75*ae771770SStanislav Sedov  *
76*ae771770SStanislav Sedov  * @param in an UCS4 string to convert
77*ae771770SStanislav Sedov  * @param in_len the length of in.
78*ae771770SStanislav Sedov  * @param out the resulting puny-coded string. The string is not NUL
79*ae771770SStanislav Sedov  * terminatied.
80*ae771770SStanislav Sedov  * @param out_len before processing out_len should be the length of
81*ae771770SStanislav Sedov  * the out variable, after processing it will be the length of the out
82*ae771770SStanislav Sedov  * string.
83*ae771770SStanislav Sedov  *
84*ae771770SStanislav Sedov  * @return returns 0 on success, an wind error code otherwise
85*ae771770SStanislav Sedov  * @ingroup wind
86*ae771770SStanislav Sedov  */
87*ae771770SStanislav Sedov 
88*ae771770SStanislav Sedov int
wind_punycode_label_toascii(const uint32_t * in,size_t in_len,char * out,size_t * out_len)89*ae771770SStanislav Sedov wind_punycode_label_toascii(const uint32_t *in, size_t in_len,
90*ae771770SStanislav Sedov 			    char *out, size_t *out_len)
91*ae771770SStanislav Sedov {
92*ae771770SStanislav Sedov     unsigned n     = initial_n;
93*ae771770SStanislav Sedov     unsigned delta = 0;
94*ae771770SStanislav Sedov     unsigned bias  = initial_bias;
95*ae771770SStanislav Sedov     unsigned h = 0;
96*ae771770SStanislav Sedov     unsigned b;
97*ae771770SStanislav Sedov     unsigned i;
98*ae771770SStanislav Sedov     unsigned o = 0;
99*ae771770SStanislav Sedov     unsigned m;
100*ae771770SStanislav Sedov 
101*ae771770SStanislav Sedov     for (i = 0; i < in_len; ++i) {
102*ae771770SStanislav Sedov 	if (in[i] < 0x80) {
103*ae771770SStanislav Sedov 	    ++h;
104*ae771770SStanislav Sedov 	    if (o >= *out_len)
105*ae771770SStanislav Sedov 		return WIND_ERR_OVERRUN;
106*ae771770SStanislav Sedov 	    out[o++] = in[i];
107*ae771770SStanislav Sedov 	}
108*ae771770SStanislav Sedov     }
109*ae771770SStanislav Sedov     b = h;
110*ae771770SStanislav Sedov     if (b > 0) {
111*ae771770SStanislav Sedov 	if (o >= *out_len)
112*ae771770SStanislav Sedov 	    return WIND_ERR_OVERRUN;
113*ae771770SStanislav Sedov 	out[o++] = 0x2D;
114*ae771770SStanislav Sedov     }
115*ae771770SStanislav Sedov     /* is this string punycoded */
116*ae771770SStanislav Sedov     if (h < in_len) {
117*ae771770SStanislav Sedov 	if (o + 4 >= *out_len)
118*ae771770SStanislav Sedov 	    return WIND_ERR_OVERRUN;
119*ae771770SStanislav Sedov 	memmove(out + 4, out, o);
120*ae771770SStanislav Sedov 	memcpy(out, "xn--", 4);
121*ae771770SStanislav Sedov 	o += 4;
122*ae771770SStanislav Sedov     }
123*ae771770SStanislav Sedov 
124*ae771770SStanislav Sedov     while (h < in_len) {
125*ae771770SStanislav Sedov 	m = (unsigned)-1;
126*ae771770SStanislav Sedov 	for (i = 0; i < in_len; ++i)
127*ae771770SStanislav Sedov 	    if(in[i] < m && in[i] >= n)
128*ae771770SStanislav Sedov 		m = in[i];
129*ae771770SStanislav Sedov 
130*ae771770SStanislav Sedov 	delta += (m - n) * (h + 1);
131*ae771770SStanislav Sedov 	n = m;
132*ae771770SStanislav Sedov 	for (i = 0; i < in_len; ++i) {
133*ae771770SStanislav Sedov 	    if (in[i] < n) {
134*ae771770SStanislav Sedov 		++delta;
135*ae771770SStanislav Sedov 	    } else if (in[i] == n) {
136*ae771770SStanislav Sedov 		unsigned q = delta;
137*ae771770SStanislav Sedov 		unsigned k;
138*ae771770SStanislav Sedov 		for (k = base; ; k += base) {
139*ae771770SStanislav Sedov 		    unsigned t;
140*ae771770SStanislav Sedov 		    if (k <= bias)
141*ae771770SStanislav Sedov 			t = t_min;
142*ae771770SStanislav Sedov 		    else if (k >= bias + t_max)
143*ae771770SStanislav Sedov 			t = t_max;
144*ae771770SStanislav Sedov 		    else
145*ae771770SStanislav Sedov 			t = k - bias;
146*ae771770SStanislav Sedov 		    if (q < t)
147*ae771770SStanislav Sedov 			break;
148*ae771770SStanislav Sedov 		    if (o >= *out_len)
149*ae771770SStanislav Sedov 			return WIND_ERR_OVERRUN;
150*ae771770SStanislav Sedov 		    out[o++] = digit(t + ((q - t) % (base - t)));
151*ae771770SStanislav Sedov 		    q = (q - t) / (base - t);
152*ae771770SStanislav Sedov 		}
153*ae771770SStanislav Sedov 		if (o >= *out_len)
154*ae771770SStanislav Sedov 		    return WIND_ERR_OVERRUN;
155*ae771770SStanislav Sedov 		out[o++] = digit(q);
156*ae771770SStanislav Sedov 		/* output */
157*ae771770SStanislav Sedov 		bias = adapt(delta, h + 1, h == b);
158*ae771770SStanislav Sedov 		delta = 0;
159*ae771770SStanislav Sedov 		++h;
160*ae771770SStanislav Sedov 	    }
161*ae771770SStanislav Sedov 	}
162*ae771770SStanislav Sedov 	++delta;
163*ae771770SStanislav Sedov 	++n;
164*ae771770SStanislav Sedov     }
165*ae771770SStanislav Sedov 
166*ae771770SStanislav Sedov     *out_len = o;
167*ae771770SStanislav Sedov     return 0;
168*ae771770SStanislav Sedov }
169