rfc4518.py revision 1.1.1.1 1 1.1 elric #!/usr/local/bin/python
2 1.1 elric # -*- coding: iso-8859-1 -*-
3 1.1 elric
4 1.1 elric # $Id: rfc4518.py,v 1.1.1.1 2011/04/13 18:15:59 elric Exp $
5 1.1 elric
6 1.1 elric # Copyright (c) 2004, 2008 Kungliga Tekniska Hgskolan
7 1.1 elric # (Royal Institute of Technology, Stockholm, Sweden).
8 1.1 elric # All rights reserved.
9 1.1 elric #
10 1.1 elric # Redistribution and use in source and binary forms, with or without
11 1.1 elric # modification, are permitted provided that the following conditions
12 1.1 elric # are met:
13 1.1 elric #
14 1.1 elric # 1. Redistributions of source code must retain the above copyright
15 1.1 elric # notice, this list of conditions and the following disclaimer.
16 1.1 elric #
17 1.1 elric # 2. Redistributions in binary form must reproduce the above copyright
18 1.1 elric # notice, this list of conditions and the following disclaimer in the
19 1.1 elric # documentation and/or other materials provided with the distribution.
20 1.1 elric #
21 1.1 elric # 3. Neither the name of the Institute nor the names of its contributors
22 1.1 elric # may be used to endorse or promote products derived from this software
23 1.1 elric # without specific prior written permission.
24 1.1 elric #
25 1.1 elric # THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
26 1.1 elric # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 1.1 elric # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 1.1 elric # ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
29 1.1 elric # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 1.1 elric # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 1.1 elric # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 1.1 elric # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 1.1 elric # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 1.1 elric # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 1.1 elric # SUCH DAMAGE.
36 1.1 elric
37 1.1 elric import re
38 1.1 elric import string
39 1.1 elric
40 1.1 elric def read():
41 1.1 elric """return a dict of tables from rfc4518"""
42 1.1 elric
43 1.1 elric ret = {}
44 1.1 elric
45 1.1 elric #2.2. Map
46 1.1 elric #
47 1.1 elric # SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
48 1.1 elric # points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and
49 1.1 elric # VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
50 1.1 elric # mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
51 1.1 elric # mapped to nothing.
52 1.1 elric
53 1.1 elric t = []
54 1.1 elric t.append(" 00AD; ; Map to nothing")
55 1.1 elric t.append(" 1806; ; Map to nothing")
56 1.1 elric t.append(" 034F; ; Map to nothing")
57 1.1 elric
58 1.1 elric t.append(" 180B; ; Map to nothing")
59 1.1 elric t.append(" 180C; ; Map to nothing")
60 1.1 elric t.append(" 180D; ; Map to nothing")
61 1.1 elric
62 1.1 elric t.append(" FE00; ; Map to nothing")
63 1.1 elric t.append(" FE01; ; Map to nothing")
64 1.1 elric t.append(" FE02; ; Map to nothing")
65 1.1 elric t.append(" FE03; ; Map to nothing")
66 1.1 elric t.append(" FE04; ; Map to nothing")
67 1.1 elric t.append(" FE05; ; Map to nothing")
68 1.1 elric t.append(" FE06; ; Map to nothing")
69 1.1 elric t.append(" FE07; ; Map to nothing")
70 1.1 elric t.append(" FE08; ; Map to nothing")
71 1.1 elric t.append(" FE09; ; Map to nothing")
72 1.1 elric t.append(" FE0A; ; Map to nothing")
73 1.1 elric t.append(" FE0B; ; Map to nothing")
74 1.1 elric t.append(" FE0C; ; Map to nothing")
75 1.1 elric t.append(" FE0D; ; Map to nothing")
76 1.1 elric t.append(" FE0E; ; Map to nothing")
77 1.1 elric t.append(" FE0F; ; Map to nothing")
78 1.1 elric
79 1.1 elric t.append(" FFFC; ; Map to nothing")
80 1.1 elric
81 1.1 elric # CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
82 1.1 elric # TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
83 1.1 elric # (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
84 1.1 elric
85 1.1 elric t.append(" 0009; 0020 ; Map to SPACE")
86 1.1 elric t.append(" 000A; 0020 ; Map to SPACE")
87 1.1 elric t.append(" 000B; 0020 ; Map to SPACE")
88 1.1 elric t.append(" 000C; 0020 ; Map to SPACE")
89 1.1 elric t.append(" 000D; 0020 ; Map to SPACE")
90 1.1 elric t.append(" 0085; 0020 ; Map to SPACE")
91 1.1 elric
92 1.1 elric # All other control code (e.g., Cc) points or code points with a
93 1.1 elric # control function (e.g., Cf) are mapped to nothing. The following is
94 1.1 elric # a complete list of these code points: U+0000-0008, 000E-001F, 007F-
95 1.1 elric # 0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063,
96 1.1 elric # 206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F.
97 1.1 elric
98 1.1 elric t.append(" 0000-0008; ; Map to nothing")
99 1.1 elric t.append(" 000E-001F; ; Map to nothing")
100 1.1 elric t.append(" 007F-0084; ; Map to nothing")
101 1.1 elric t.append(" 0086-009F; ; Map to nothing")
102 1.1 elric t.append(" 06DD; ; Map to nothing")
103 1.1 elric t.append(" 070F; ; Map to nothing")
104 1.1 elric t.append(" 180E; ; Map to nothing")
105 1.1 elric t.append(" 200C-200F; ; Map to nothing")
106 1.1 elric t.append(" 202A-202E; ; Map to nothing")
107 1.1 elric t.append(" 2060-2063; ; Map to nothing")
108 1.1 elric t.append(" 206A-206F; ; Map to nothing")
109 1.1 elric t.append(" FEFF; ; Map to nothing")
110 1.1 elric t.append(" FFF9-FFFB; ; Map to nothing")
111 1.1 elric t.append(" 1D173-1D17A; ; Map to nothing")
112 1.1 elric t.append(" E0001; ; Map to nothing")
113 1.1 elric t.append(" E0020-E007F; ; Map to nothing")
114 1.1 elric
115 1.1 elric # ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code
116 1.1 elric # points with Separator (space, line, or paragraph) property (e.g., Zs,
117 1.1 elric # Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete
118 1.1 elric # list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029,
119 1.1 elric # 202F, 205F, 3000.
120 1.1 elric
121 1.1 elric t.append(" 200B; ; Map to nothing")
122 1.1 elric t.append(" 0020; 0020; Map to SPACE")
123 1.1 elric t.append(" 00A0; 0020; Map to SPACE")
124 1.1 elric t.append(" 1680; 0020; Map to SPACE")
125 1.1 elric t.append(" 2000-200A; 0020; Map to SPACE")
126 1.1 elric t.append(" 2028-2029; 0020; Map to SPACE")
127 1.1 elric t.append(" 202F; 0020; Map to SPACE")
128 1.1 elric t.append(" 205F; 0020; Map to SPACE")
129 1.1 elric t.append(" 3000; 0020; Map to SPACE")
130 1.1 elric
131 1.1 elric ret["rfc4518-map"] = t
132 1.1 elric
133 1.1 elric # For case ignore, numeric, and stored prefix string matching rules,
134 1.1 elric # characters are case folded per B.2 of [RFC3454].
135 1.1 elric
136 1.1 elric t = []
137 1.1 elric
138 1.1 elric #2.4. Prohibit
139 1.1 elric
140 1.1 elric # The REPLACEMENT CHARACTER (U+FFFD) code point is prohibited.
141 1.1 elric
142 1.1 elric t.append(" FFFD;")
143 1.1 elric
144 1.1 elric ret["rfc4518-error"] = t
145 1.1 elric
146 1.1 elric t = []
147 1.1 elric
148 1.1 elric
149 1.1 elric
150 1.1 elric return ret
151