rfc3492.txt - OpenGrok cross reference for /freebsd/crypto/heimdal/lib/wind/rfc3492.txt

Lines Matching +full:- +full:u
33    literally, and non-ASCII characters are represented by ASCII
50        3.3 Generalized variable-length integers..................5
75    A. Mixed-case annotation.....................................22
84    domain names.  Labels containing non-ASCII characters can be
134    strings are case-folded prior to encoding, the basic string can use
135    mixed case to tell how to convert the folded string into a mixed-case
136    string.  See appendix A "Mixed-case annotation".
155    by "U+" followed by four to six hexadecimal digits, while a range of
191    coding" encodes the non-basic code points as deltas, and processes
194    are represented as "generalized variable-length integers", which use
214    generalized variable-length integers, described in section 3.3.  The
219    (excluding the last delimiter).  The decoder inserts non-basic code
240    number of wrap-arounds.
247    run-length encoding of this sequence of events: they are the lengths
248    of the runs of non-insertion states preceeding the insertion states.
266 3.3 Generalized variable-length integers
269    distinct symbols for digits, whose values are 0 through base-1.  Let
287    when unique encodings are needed.  Second, the integer is not self-
291    The generalized variable-length representation solves these two
292    problems.  The digit values are still 0 through base-1, but now the
293    integer is self-delimiting by means of thresholds t(j), each of which
294    is in the range 0 through base-1.  Exactly one digit, the most
297    the first if they are little-endian (least significant digit first),
298    or starting with the last if they are big-endian (most significant
303       w(j) = w(j-1) * (base - t(j-1)) for j > 0
305    For example, consider the little-endian sequence of base 8 digits
307    implies that the weights are 1, 1*(8-2) = 6, 6*(8-3) = 30, 30*(8-5) =
308    90, 90*(8-5) = 270, and so on.  7 is not less than 2, and 3 is not
315    then stop, otherwise increase w by a factor of (base - t), update t
320    output the digit for t + ((N - t) mod (base - t)), then replace N
321    with (N - t) div (base - t), update t for the next position, and
325    generalized variable-length representation of each nonnegative
328    Bootstring uses little-endian ordering so that the deltas can be
333       t(j) = base * (j + 1) - bias,
376          while delta > ((base - tmin) * tmax) div 2
377          do let delta = delta div (base - tmin)
383            (((base - tmin + 1) * delta) div (delta + skew))
389       up through the one expected to be third-last, and somewhere
390       between tmin and tmax for the digit expected to be second-last
399       (balancing the hope of the expected-last digit being unnecessary
406    distinguishable basic code points remaining.  The digit-values in the
407    range 0 through base-1 need to be associated with distinct non-
409    to have the same digit-value; for example, uppercase and lowercase
411    are case-insensitive.
413    The initial value of n cannot be greater than the minimum non-basic
419       0 <= tmin <= tmax <= base-1
422       initial_bias mod base <= base - tmin
427    If support for mixed-case annotation is desired (see appendix A),
428    make sure that the code points corresponding to 0 through tmax-1 all
455    use by the UTF-16 encoding of Unicode).  The basic code points are
456    the ASCII [ASCII] code points (0..7F), of which U+002D (-) is the
457    delimiter, and some of the others have digit-values as follows:
459       code points    digit-values
460       ------------   ----------------------
461       41..5A (A-Z) =  0 to 25, respectively
462       61..7A (a-z) =  0 to 25, respectively
463       30..39 (0-9) = 26 to 35, respectively
465    Using hyphen-minus as the delimiter implies that the encoded string
466    can end with a hyphen-minus only if the Unicode string consists
468    being encoded.  The encoded string can begin with a hyphen-minus, but
471    hyphen-minus [RFC952].
475    only uppercase forms or only lowercase forms, unless it uses mixed-
487       U V
523      while delta > ((base - tmin) * tmax) div 2 do begin
524        let delta = delta div (base - tmin)
527      return k + (((base - tmin + 1) * delta) div (delta + skew))
574      and copy them to output, fail on any non-basic code point
582        let digit = the code point's digit-value, fail if it has none
585                tmax if k >= bias + tmax, or k - bias otherwise
587        let w = w * (base - t), fail on overflow
589      let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?)
610    only error conditions are invalid code points, unexpected end-of-
614    inputs.  Without this property it would have been necessary to re-
633    {if the input contains a non-basic code point < n then fail}
635      let m = the minimum {non-basic} code point >= n in the input
636      let delta = delta + (m - n) * (h + 1), fail on overflow
644                    tmax if k >= bias + tmax, or k - bias otherwise
646            output the code point for digit t + ((q - t) mod (base - t))
647            let q = (q - t) div (base - t)
659    contains a non-basic code point less than n) can be omitted if all
663    The brace-enclosed conditions "non-basic" and "or c is basic" can be
693    For IDNA, 26-bit unsigned integers are sufficient to handle all valid
694    IDNA labels without overflow, because any string that needed a 27-bit
703    overflows if and only if B > maxint - A, and A + (B * C) overflows if
704    and only if B > (maxint - A) div C, where maxint is the greatest
714    L, then no delta could ever exceed (M - initial_n) * (L + 1), and
721    number of digits in a variable-length integer (that is, limiting the
736    but to check the final output string by re-encoding it and comparing
738    case-insensitive ASCII comparison) overflow has occurred.  This
739    delayed-detection approach would not impose any more restrictions on
740    the input than the immediate-detection approach does, and might be
745    ToUnicode performs a higher level re-encoding and comparison, and a
763        u+0644 u+064A u+0647 u+0645 u+0627 u+0628 u+062A u+0643 u+0644
764        u+0645 u+0648 u+0634 u+0639 u+0631 u+0628 u+064A u+061F
768        u+4ED6 u+4EEC u+4E3A u+4EC0 u+4E48 u+4E0D u+8BF4 u+4E2D u+6587
772        u+4ED6 u+5011 u+7232 u+4EC0 u+9EBD u+4E0D u+8AAA u+4E2D u+6587
776        U+0050 u+0072 u+006F u+010D u+0070 u+0072 u+006F u+0073 u+0074
777        u+011B u+006E u+0065 u+006D u+006C u+0075 u+0076 u+00ED u+010D
778        u+0065 u+0073 u+006B u+0079
779        Punycode: Proprostnemluvesky-uyb24dma41a
792        u+05DC u+05DE u+05D4 u+05D4 u+05DD u+05E4 u+05E9 u+05D5 u+05D8
793        u+05DC u+05D0 u+05DE u+05D3 u+05D1 u+05E8 u+05D9 u+05DD u+05E2
794        u+05D1 u+05E8 u+05D9 u+05EA
798        u+092F u+0939 u+0932 u+094B u+0917 u+0939 u+093F u+0928 u+094D
799        u+0926 u+0940 u+0915 u+094D u+092F u+094B u+0902 u+0928 u+0939
800        u+0940 u+0902 u+092C u+094B u+0932 u+0938 u+0915 u+0924 u+0947
801        u+0939 u+0948 u+0902
805        u+306A u+305C u+307F u+3093 u+306A u+65E5 u+672C u+8A9E u+3092
806        u+8A71 u+3057 u+3066 u+304F u+308C u+306A u+3044 u+306E u+304B
810        u+C138 u+ACC4 u+C758 u+BAA8 u+B4E0 u+C0AC u+B78C u+B4E4 u+C774
811        u+D55C u+AD6D u+C5B4 u+B97C u+C774 u+D574 u+D55C u+B2E4 u+BA74
812        u+C5BC u+B9C8 u+B098 u+C88B u+C744 u+AE4C
817        U+043F u+043E u+0447 u+0435 u+043C u+0443 u+0436 u+0435 u+043E
818        u+043D u+0438 u+043D u+0435 u+0433 u+043E u+0432 u+043E u+0440
819        u+044F u+0442 u+043F u+043E u+0440 u+0443 u+0441 u+0441 u+043A
820        u+0438
824        U+0050 u+006F u+0072 u+0071 u+0075 u+00E9 u+006E u+006F u+0070
825        u+0075 u+0065 u+0064 u+0065 u+006E u+0073 u+0069 u+006D u+0070
826        u+006C u+0065 u+006D u+0065 u+006E u+0074 u+0065 u+0068 u+0061
827        u+0062 u+006C u+0061 u+0072 u+0065 u+006E U+0045 u+0073 u+0070
828        u+0061 u+00F1 u+006F u+006C
829        Punycode: PorqunopuedensimplementehablarenEspaol-fmd56a
834        U+0054 u+1EA1 u+0069 u+0073 u+0061 u+006F u+0068 u+1ECD u+006B
835        u+0068 u+00F4 u+006E u+0067 u+0074 u+0068 u+1EC3 u+0063 u+0068
836        u+1EC9 u+006E u+00F3 u+0069 u+0074 u+0069 u+1EBF u+006E u+0067
837        U+0056 u+0069 u+1EC7 u+0074
838        Punycode: TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g
849    them handy (but Japanese is useful for providing examples of single-
850    row text, two-row text, ideographic text, and various mixtures
854        u+0033 u+5E74 U+0042 u+7D44 u+91D1 u+516B u+5148 u+751F
855        Punycode: 3B-ww4c5e180e575a65lsy2b
857    (M) <amuro><namie>-with-SUPER-MONKEYS
858        u+5B89 u+5BA4 u+5948 u+7F8E u+6075 u+002D u+0077 u+0069 u+0074
859        u+0068 u+002D U+0053 U+0055 U+0050 U+0045 U+0052 u+002D U+004D
860        U+004F U+004E U+004B U+0045 U+0059 U+0053
861        Punycode: -with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n
863    (N) Hello-Another-Way-<sorezore><no><basho>
864        U+0048 u+0065 u+006C u+006C u+006F u+002D U+0041 u+006E u+006F
865        u+0074 u+0068 u+0065 u+0072 u+002D U+0057 u+0061 u+0079 u+002D
866        u+305D u+308C u+305E u+308C u+306E u+5834 u+6240
867        Punycode: Hello-Another-Way--fc4qua05auwb3674vfr0b
870        u+3072 u+3068 u+3064 u+5C4B u+6839 u+306E u+4E0B u+0032
871        Punycode: 2-u9tlzr9756bt3uc0v
874        U+004D u+0061 u+006A u+0069 u+3067 U+004B u+006F u+0069 u+3059
875        u+308B u+0035 u+79D2 u+524D
876        Punycode: MajiKoi5-783gue6qz075azm5e
879        u+30D1 u+30D5 u+30A3 u+30FC u+0064 u+0065 u+30EB u+30F3 u+30D0
880        Punycode: de-jg4avhby1noc0d
883        u+305D u+306E u+30B9 u+30D4 u+30FC u+30C9 u+3067
890    (S) -> $1.00 <-
891        u+002D u+003E u+0020 u+0024 u+0031 u+002E u+0030 u+0030 u+0020
892        u+003C u+002D
893        Punycode: -> $1.00 <--
962    input is "3B-ww4c5e180e575a65lsy2b"
963    literal portion is "3B-", so extended string starts as:
1076    basic code points (0033, 0042) are copied to literal portion: "3B-"
1095    output is "3B-ww4c5e180e575a65lsy2b"
1139    [RFC1034]    Mockapetris, P., "Domain Names - Concepts and
1183 A. Mixed-case annotation
1185    In order to use Punycode to represent case-insensitive strings,
1186    higher layers need to case-fold the strings prior to Punycode
1188    telling how to convert the folded string into a mixed-case string for
1189    display purposes.  Note, however, that mixed-case annotation is not
1195    leaving uppercase code points uppercase.  Each non-basic code point
1198    is uppercase, it is a suggestion to map the non-basic code point to
1200    map the non-basic code point to lowercase (if possible).
1264 #if UINT_MAX >= (1 << 26) - 1
1281     /* output string is *not* null-terminated; it will contain        */
1364 /* base-1, or base if cp is does not represent a value.       */
1368   return  cp - 48 < 10 ? cp - 22 :  cp - 65 < 26 ? cp - 65 :
1369           cp - 97 < 26 ? cp - 97 :  base;
1374 /* the range 0 to base-1.  The lowercase form is used unless flag is  */
1380   return d + 22 + 75 * (d < 26) - ((flag != 0) << 5);
1389 #define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26)
1407   bcp -= (bcp - 97 < 26) << 5;
1408   return bcp + ((!flag && (bcp - 65 < 26)) << 5);
1411 /*** Platform-specific constants ***/
1414 static const punycode_uint maxint = -1;
1415 /* Because maxint is unsigned, -1 becomes the maximum value. */
1428   for (k = 0;  delta > ((base - tmin) * tmax) / 2;  k += base) {
1429     delta /= base - tmin;
1432   return k + (base - tmin + 1) * delta / (delta + skew);
1465       if (max_out - out < 2) return punycode_big_output;
1484     /* All non-basic code points < n have been     */
1496     if (m - n > (maxint - delta) / (h + 1)) return punycode_overflow;
1497     delta += (m - n) * (h + 1);
1507         /* Represent delta as a generalized variable-length integer: */
1520               k >= bias + tmax ? tmax : k - bias;
1522           output[out++] = encode_digit(t + (q - t) % (base - t), 0);
1523           q = (q - t) / (base - t);
1588     /* Decode a generalized variable-length integer into delta,  */
1597       if (digit > (maxint - i) / w) return punycode_overflow;
1600           k >= bias + tmax ? tmax : k - bias;
1602       if (w > maxint / (base - t)) return punycode_overflow;
1603       w *= (base - t);
1606     bias = adapt(i - oldi, out + 1, oldi == 0);
1611     if (i / (out + 1) > maxint - n) return punycode_overflow;
1622       memmove(case_flags + i + 1, case_flags + i, out - i);
1632       case_flags[i] = flagged(input[in - 1]);
1635     memmove(output + i + 1, output + i, (out - i) * sizeof *output);
1651 /* For testing, we'll just set some compile-time limits rather than */
1652 /* use malloc(), and set a compile-time option rather than using a  */
1653 /* command-line option.                                             */
1664     "%s -e reads code points and writes a Punycode string.\n"
1665     "%s -d reads a Punycode string and writes code points.\n"
1668     "Code points are in the form u+hex separated by whitespace.\n"
1673     "The case of the u in u+hex is the force-to-uppercase flag.\n"
1704   " !\"#$%&'()*+,-./"
1719   if (argv[1][0] != '-') usage(argv);
1745       if (r != 2 || uplus[1] != '+' || codept > (punycode_uint)-1) {
1751       if (uplus[0] == 'u') case_flags[input_length] = 0;
1752       else if (uplus[0] == 'U') case_flags[input_length] = 1;
1800     input_length = strlen(input) - 1;
1807       *p = pp - print_ascii;
1824                  case_flags[j] ? "U" : "u",