Home | History | Annotate | Line # | Download | only in wind
rfc4518.py revision 1.1.1.2
      1      1.1  elric #!/usr/local/bin/python
      2      1.1  elric # -*- coding: iso-8859-1 -*-
      3      1.1  elric 
      4  1.1.1.2  elric # Id
      5      1.1  elric 
      6      1.1  elric # Copyright (c) 2004, 2008 Kungliga Tekniska Hgskolan
      7      1.1  elric # (Royal Institute of Technology, Stockholm, Sweden). 
      8      1.1  elric # All rights reserved. 
      9      1.1  elric # 
     10      1.1  elric # Redistribution and use in source and binary forms, with or without 
     11      1.1  elric # modification, are permitted provided that the following conditions 
     12      1.1  elric # are met: 
     13      1.1  elric # 
     14      1.1  elric # 1. Redistributions of source code must retain the above copyright 
     15      1.1  elric #    notice, this list of conditions and the following disclaimer. 
     16      1.1  elric # 
     17      1.1  elric # 2. Redistributions in binary form must reproduce the above copyright 
     18      1.1  elric #    notice, this list of conditions and the following disclaimer in the 
     19      1.1  elric #    documentation and/or other materials provided with the distribution. 
     20      1.1  elric # 
     21      1.1  elric # 3. Neither the name of the Institute nor the names of its contributors 
     22      1.1  elric #    may be used to endorse or promote products derived from this software 
     23      1.1  elric #    without specific prior written permission. 
     24      1.1  elric # 
     25      1.1  elric # THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND 
     26      1.1  elric # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
     27      1.1  elric # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
     28      1.1  elric # ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE 
     29      1.1  elric # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
     30      1.1  elric # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
     31      1.1  elric # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
     32      1.1  elric # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
     33      1.1  elric # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
     34      1.1  elric # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
     35      1.1  elric # SUCH DAMAGE. 
     36      1.1  elric 
     37      1.1  elric import re
     38      1.1  elric import string
     39      1.1  elric 
     40      1.1  elric def read():
     41      1.1  elric     """return a dict of tables from rfc4518"""
     42      1.1  elric 
     43      1.1  elric     ret = {}
     44      1.1  elric 
     45      1.1  elric #2.2.  Map
     46      1.1  elric #
     47      1.1  elric #   SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
     48      1.1  elric #   points are mapped to nothing.  COMBINING GRAPHEME JOINER (U+034F) and
     49      1.1  elric #   VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
     50      1.1  elric #   mapped to nothing.  The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
     51      1.1  elric #   mapped to nothing.
     52      1.1  elric 
     53      1.1  elric     t = []
     54      1.1  elric     t.append(" 00AD; ; Map to nothing")
     55      1.1  elric     t.append(" 1806; ; Map to nothing")
     56      1.1  elric     t.append(" 034F; ; Map to nothing")
     57      1.1  elric 
     58      1.1  elric     t.append(" 180B; ; Map to nothing")
     59      1.1  elric     t.append(" 180C; ; Map to nothing")
     60      1.1  elric     t.append(" 180D; ; Map to nothing")
     61      1.1  elric 
     62      1.1  elric     t.append(" FE00; ; Map to nothing")
     63      1.1  elric     t.append(" FE01; ; Map to nothing")
     64      1.1  elric     t.append(" FE02; ; Map to nothing")
     65      1.1  elric     t.append(" FE03; ; Map to nothing")
     66      1.1  elric     t.append(" FE04; ; Map to nothing")
     67      1.1  elric     t.append(" FE05; ; Map to nothing")
     68      1.1  elric     t.append(" FE06; ; Map to nothing")
     69      1.1  elric     t.append(" FE07; ; Map to nothing")
     70      1.1  elric     t.append(" FE08; ; Map to nothing")
     71      1.1  elric     t.append(" FE09; ; Map to nothing")
     72      1.1  elric     t.append(" FE0A; ; Map to nothing")
     73      1.1  elric     t.append(" FE0B; ; Map to nothing")
     74      1.1  elric     t.append(" FE0C; ; Map to nothing")
     75      1.1  elric     t.append(" FE0D; ; Map to nothing")
     76      1.1  elric     t.append(" FE0E; ; Map to nothing")
     77      1.1  elric     t.append(" FE0F; ; Map to nothing")
     78      1.1  elric 
     79      1.1  elric     t.append(" FFFC; ; Map to nothing")
     80      1.1  elric 
     81      1.1  elric #   CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
     82      1.1  elric #   TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
     83      1.1  elric #  (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
     84      1.1  elric 
     85      1.1  elric     t.append(" 0009; 0020 ; Map to SPACE")
     86      1.1  elric     t.append(" 000A; 0020 ; Map to SPACE")
     87      1.1  elric     t.append(" 000B; 0020 ; Map to SPACE")
     88      1.1  elric     t.append(" 000C; 0020 ; Map to SPACE")
     89      1.1  elric     t.append(" 000D; 0020 ; Map to SPACE")
     90      1.1  elric     t.append(" 0085; 0020 ; Map to SPACE")
     91      1.1  elric 
     92      1.1  elric #   All other control code (e.g., Cc) points or code points with a
     93      1.1  elric #   control function (e.g., Cf) are mapped to nothing.  The following is
     94      1.1  elric #   a complete list of these code points: U+0000-0008, 000E-001F, 007F-
     95      1.1  elric #   0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063,
     96      1.1  elric #   206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F.
     97      1.1  elric 
     98      1.1  elric     t.append(" 0000-0008; ; Map to nothing")
     99      1.1  elric     t.append(" 000E-001F; ; Map to nothing")
    100      1.1  elric     t.append(" 007F-0084; ; Map to nothing")
    101      1.1  elric     t.append(" 0086-009F; ; Map to nothing")
    102      1.1  elric     t.append(" 06DD; ; Map to nothing")
    103      1.1  elric     t.append(" 070F; ; Map to nothing")
    104      1.1  elric     t.append(" 180E; ; Map to nothing")
    105      1.1  elric     t.append(" 200C-200F; ; Map to nothing")
    106      1.1  elric     t.append(" 202A-202E; ; Map to nothing")
    107      1.1  elric     t.append(" 2060-2063; ; Map to nothing")
    108      1.1  elric     t.append(" 206A-206F; ; Map to nothing")
    109      1.1  elric     t.append(" FEFF; ; Map to nothing")
    110      1.1  elric     t.append(" FFF9-FFFB; ; Map to nothing")
    111      1.1  elric     t.append(" 1D173-1D17A; ; Map to nothing")
    112      1.1  elric     t.append(" E0001; ; Map to nothing")
    113      1.1  elric     t.append(" E0020-E007F; ; Map to nothing")
    114      1.1  elric 
    115      1.1  elric #   ZERO WIDTH SPACE (U+200B) is mapped to nothing.  All other code
    116      1.1  elric #   points with Separator (space, line, or paragraph) property (e.g., Zs,
    117      1.1  elric #   Zl, or Zp) are mapped to SPACE (U+0020).  The following is a complete
    118      1.1  elric #   list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029,
    119      1.1  elric #   202F, 205F, 3000.
    120      1.1  elric 
    121      1.1  elric     t.append(" 200B; ; Map to nothing")
    122      1.1  elric     t.append(" 0020; 0020; Map to SPACE")
    123      1.1  elric     t.append(" 00A0; 0020; Map to SPACE")
    124      1.1  elric     t.append(" 1680; 0020; Map to SPACE")
    125      1.1  elric     t.append(" 2000-200A; 0020; Map to SPACE")
    126      1.1  elric     t.append(" 2028-2029; 0020; Map to SPACE")
    127      1.1  elric     t.append(" 202F; 0020; Map to SPACE")
    128      1.1  elric     t.append(" 205F; 0020; Map to SPACE")
    129      1.1  elric     t.append(" 3000; 0020; Map to SPACE")
    130      1.1  elric 
    131      1.1  elric     ret["rfc4518-map"] = t
    132      1.1  elric 
    133      1.1  elric #   For case ignore, numeric, and stored prefix string matching rules,
    134      1.1  elric #   characters are case folded per B.2 of [RFC3454].
    135      1.1  elric 
    136      1.1  elric     t = []
    137      1.1  elric 
    138      1.1  elric #2.4.  Prohibit
    139      1.1  elric 
    140      1.1  elric #    The REPLACEMENT CHARACTER (U+FFFD) code point is prohibited.
    141      1.1  elric 
    142      1.1  elric     t.append(" FFFD;")
    143      1.1  elric 
    144      1.1  elric     ret["rfc4518-error"] = t
    145      1.1  elric 
    146      1.1  elric     t = []
    147      1.1  elric 
    148      1.1  elric 
    149      1.1  elric 
    150      1.1  elric     return ret
    151