1*ae771770SStanislav Sedov#!/usr/local/bin/python 2*ae771770SStanislav Sedov# -*- coding: iso-8859-1 -*- 3*ae771770SStanislav Sedov 4*ae771770SStanislav Sedov# $Id$ 5*ae771770SStanislav Sedov 6*ae771770SStanislav Sedov# Copyright (c) 2004, 2008 Kungliga Tekniska Högskolan 7*ae771770SStanislav Sedov# (Royal Institute of Technology, Stockholm, Sweden). 8*ae771770SStanislav Sedov# All rights reserved. 9*ae771770SStanislav Sedov# 10*ae771770SStanislav Sedov# Redistribution and use in source and binary forms, with or without 11*ae771770SStanislav Sedov# modification, are permitted provided that the following conditions 12*ae771770SStanislav Sedov# are met: 13*ae771770SStanislav Sedov# 14*ae771770SStanislav Sedov# 1. Redistributions of source code must retain the above copyright 15*ae771770SStanislav Sedov# notice, this list of conditions and the following disclaimer. 16*ae771770SStanislav Sedov# 17*ae771770SStanislav Sedov# 2. Redistributions in binary form must reproduce the above copyright 18*ae771770SStanislav Sedov# notice, this list of conditions and the following disclaimer in the 19*ae771770SStanislav Sedov# documentation and/or other materials provided with the distribution. 20*ae771770SStanislav Sedov# 21*ae771770SStanislav Sedov# 3. Neither the name of the Institute nor the names of its contributors 22*ae771770SStanislav Sedov# may be used to endorse or promote products derived from this software 23*ae771770SStanislav Sedov# without specific prior written permission. 24*ae771770SStanislav Sedov# 25*ae771770SStanislav Sedov# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND 26*ae771770SStanislav Sedov# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27*ae771770SStanislav Sedov# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28*ae771770SStanislav Sedov# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE 29*ae771770SStanislav Sedov# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30*ae771770SStanislav Sedov# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31*ae771770SStanislav Sedov# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32*ae771770SStanislav Sedov# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33*ae771770SStanislav Sedov# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34*ae771770SStanislav Sedov# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35*ae771770SStanislav Sedov# SUCH DAMAGE. 36*ae771770SStanislav Sedov 37*ae771770SStanislav Sedovimport re 38*ae771770SStanislav Sedovimport string 39*ae771770SStanislav Sedov 40*ae771770SStanislav Sedovdef read(): 41*ae771770SStanislav Sedov """return a dict of tables from rfc4518""" 42*ae771770SStanislav Sedov 43*ae771770SStanislav Sedov ret = {} 44*ae771770SStanislav Sedov 45*ae771770SStanislav Sedov#2.2. Map 46*ae771770SStanislav Sedov# 47*ae771770SStanislav Sedov# SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 48*ae771770SStanislav Sedov# points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 49*ae771770SStanislav Sedov# VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 50*ae771770SStanislav Sedov# mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 51*ae771770SStanislav Sedov# mapped to nothing. 52*ae771770SStanislav Sedov 53*ae771770SStanislav Sedov t = [] 54*ae771770SStanislav Sedov t.append(" 00AD; ; Map to nothing") 55*ae771770SStanislav Sedov t.append(" 1806; ; Map to nothing") 56*ae771770SStanislav Sedov t.append(" 034F; ; Map to nothing") 57*ae771770SStanislav Sedov 58*ae771770SStanislav Sedov t.append(" 180B; ; Map to nothing") 59*ae771770SStanislav Sedov t.append(" 180C; ; Map to nothing") 60*ae771770SStanislav Sedov t.append(" 180D; ; Map to nothing") 61*ae771770SStanislav Sedov 62*ae771770SStanislav Sedov t.append(" FE00; ; Map to nothing") 63*ae771770SStanislav Sedov t.append(" FE01; ; Map to nothing") 64*ae771770SStanislav Sedov t.append(" FE02; ; Map to nothing") 65*ae771770SStanislav Sedov t.append(" FE03; ; Map to nothing") 66*ae771770SStanislav Sedov t.append(" FE04; ; Map to nothing") 67*ae771770SStanislav Sedov t.append(" FE05; ; Map to nothing") 68*ae771770SStanislav Sedov t.append(" FE06; ; Map to nothing") 69*ae771770SStanislav Sedov t.append(" FE07; ; Map to nothing") 70*ae771770SStanislav Sedov t.append(" FE08; ; Map to nothing") 71*ae771770SStanislav Sedov t.append(" FE09; ; Map to nothing") 72*ae771770SStanislav Sedov t.append(" FE0A; ; Map to nothing") 73*ae771770SStanislav Sedov t.append(" FE0B; ; Map to nothing") 74*ae771770SStanislav Sedov t.append(" FE0C; ; Map to nothing") 75*ae771770SStanislav Sedov t.append(" FE0D; ; Map to nothing") 76*ae771770SStanislav Sedov t.append(" FE0E; ; Map to nothing") 77*ae771770SStanislav Sedov t.append(" FE0F; ; Map to nothing") 78*ae771770SStanislav Sedov 79*ae771770SStanislav Sedov t.append(" FFFC; ; Map to nothing") 80*ae771770SStanislav Sedov 81*ae771770SStanislav Sedov# CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE 82*ae771770SStanislav Sedov# TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR) 83*ae771770SStanislav Sedov# (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020). 84*ae771770SStanislav Sedov 85*ae771770SStanislav Sedov t.append(" 0009; 0020 ; Map to SPACE") 86*ae771770SStanislav Sedov t.append(" 000A; 0020 ; Map to SPACE") 87*ae771770SStanislav Sedov t.append(" 000B; 0020 ; Map to SPACE") 88*ae771770SStanislav Sedov t.append(" 000C; 0020 ; Map to SPACE") 89*ae771770SStanislav Sedov t.append(" 000D; 0020 ; Map to SPACE") 90*ae771770SStanislav Sedov t.append(" 0085; 0020 ; Map to SPACE") 91*ae771770SStanislav Sedov 92*ae771770SStanislav Sedov# All other control code (e.g., Cc) points or code points with a 93*ae771770SStanislav Sedov# control function (e.g., Cf) are mapped to nothing. The following is 94*ae771770SStanislav Sedov# a complete list of these code points: U+0000-0008, 000E-001F, 007F- 95*ae771770SStanislav Sedov# 0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063, 96*ae771770SStanislav Sedov# 206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F. 97*ae771770SStanislav Sedov 98*ae771770SStanislav Sedov t.append(" 0000-0008; ; Map to nothing") 99*ae771770SStanislav Sedov t.append(" 000E-001F; ; Map to nothing") 100*ae771770SStanislav Sedov t.append(" 007F-0084; ; Map to nothing") 101*ae771770SStanislav Sedov t.append(" 0086-009F; ; Map to nothing") 102*ae771770SStanislav Sedov t.append(" 06DD; ; Map to nothing") 103*ae771770SStanislav Sedov t.append(" 070F; ; Map to nothing") 104*ae771770SStanislav Sedov t.append(" 180E; ; Map to nothing") 105*ae771770SStanislav Sedov t.append(" 200C-200F; ; Map to nothing") 106*ae771770SStanislav Sedov t.append(" 202A-202E; ; Map to nothing") 107*ae771770SStanislav Sedov t.append(" 2060-2063; ; Map to nothing") 108*ae771770SStanislav Sedov t.append(" 206A-206F; ; Map to nothing") 109*ae771770SStanislav Sedov t.append(" FEFF; ; Map to nothing") 110*ae771770SStanislav Sedov t.append(" FFF9-FFFB; ; Map to nothing") 111*ae771770SStanislav Sedov t.append(" 1D173-1D17A; ; Map to nothing") 112*ae771770SStanislav Sedov t.append(" E0001; ; Map to nothing") 113*ae771770SStanislav Sedov t.append(" E0020-E007F; ; Map to nothing") 114*ae771770SStanislav Sedov 115*ae771770SStanislav Sedov# ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code 116*ae771770SStanislav Sedov# points with Separator (space, line, or paragraph) property (e.g., Zs, 117*ae771770SStanislav Sedov# Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 118*ae771770SStanislav Sedov# list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029, 119*ae771770SStanislav Sedov# 202F, 205F, 3000. 120*ae771770SStanislav Sedov 121*ae771770SStanislav Sedov t.append(" 200B; ; Map to nothing") 122*ae771770SStanislav Sedov t.append(" 0020; 0020; Map to SPACE") 123*ae771770SStanislav Sedov t.append(" 00A0; 0020; Map to SPACE") 124*ae771770SStanislav Sedov t.append(" 1680; 0020; Map to SPACE") 125*ae771770SStanislav Sedov t.append(" 2000-200A; 0020; Map to SPACE") 126*ae771770SStanislav Sedov t.append(" 2028-2029; 0020; Map to SPACE") 127*ae771770SStanislav Sedov t.append(" 202F; 0020; Map to SPACE") 128*ae771770SStanislav Sedov t.append(" 205F; 0020; Map to SPACE") 129*ae771770SStanislav Sedov t.append(" 3000; 0020; Map to SPACE") 130*ae771770SStanislav Sedov 131*ae771770SStanislav Sedov ret["rfc4518-map"] = t 132*ae771770SStanislav Sedov 133*ae771770SStanislav Sedov# For case ignore, numeric, and stored prefix string matching rules, 134*ae771770SStanislav Sedov# characters are case folded per B.2 of [RFC3454]. 135*ae771770SStanislav Sedov 136*ae771770SStanislav Sedov t = [] 137*ae771770SStanislav Sedov 138*ae771770SStanislav Sedov#2.4. Prohibit 139*ae771770SStanislav Sedov 140*ae771770SStanislav Sedov# The REPLACEMENT CHARACTER (U+FFFD) code point is prohibited. 141*ae771770SStanislav Sedov 142*ae771770SStanislav Sedov t.append(" FFFD;") 143*ae771770SStanislav Sedov 144*ae771770SStanislav Sedov ret["rfc4518-error"] = t 145*ae771770SStanislav Sedov 146*ae771770SStanislav Sedov t = [] 147*ae771770SStanislav Sedov 148*ae771770SStanislav Sedov 149*ae771770SStanislav Sedov 150*ae771770SStanislav Sedov return ret 151