gmisclib.g_encode

1 # -*- coding: utf-8 -*- 2 """This module allows strings to be encoded into a 3 reduced subset. It is designed to work for avio.py, 4 and to do a minimal mapping, so that the resulting 5 text is human-readable. It is similar to Quoted-printable 6 encoding, but is not specialized to e-mail limitations and 7 is rather more flexible. 8 """ 9 10 11 import re 12 import string; 13 14 15 __version__ = "$Revision: 1.10 $" 16 17

18 -class BadFormatError(Exception):

19 - def __init__(self, *x):

20 Exception.__init__(self, *x)

21 22 23 _backdict = {'0':0, '1':1, '2':2, '3':3, '4':4, '5':5, 24 '6':6, '7':7, '8':8, '9':9, 25 'a':10, 'A':10, 'b':11, 'B':11, 26 'c':12, 'C':12, 'd':13, 'D':13, 27 'e':14, 'E':14, 'f':15, 'F':15 } 28 29 30 _specials = [ ('mt', ''), 31 (' ', '_'), 32 ('u', '_'), 33 ('p', '.'), 34 ('m', ','), 35 ('s', ';'), 36 ('z', '='), 37 ('t', '\t'), 38 ('Z', '\033'), 39 ('M', '&'), 40 ('T', '%'), 41 ('l', '/'), 42 ('K', '\\'), 43 ('k', '\b'), 44 ('R', '\r'), 45 ('L', '\n'), 46 ('q', '"'), 47 ('Q', '?'), 48 ('U', "'"), 49 ('S', ' '), 50 ('P', '#') 51 ] 52 53 54 55

56 -def _expand_bdict(b):

57 o = {} 58 for (si, ni) in b.items(): 59 for (sj, nj) in b.items(): 60 o[si + sj] = chr(16*ni + nj) 61 for (k, v) in _specials: 62 assert not _backdict.has_key(k), "Special (%s) collides with hex." % k 63 assert not o.has_key(k), "Special (%s) collides with hex or special." % k 64 o[k] = v 65 return o

66 67 68 _bdict = _expand_bdict(_backdict) 69 70

71 -def _fromhex(x):

72 """Expands a %XX code (or the specials above) into a character.""" 73 q = x.group(1) 74 return _bdict[q]

75 76 77

78 -def _rm_nl(s):

79 if s.endswith('\n'): 80 return s[:-1] 81 return s

82 83 84

85 -def _expand_fdict(eschar):

86 o = {} 87 for c in range(256): 88 o[chr(c)] = '%s%02x' % (eschar, c) 89 for (k,v) in _specials: 90 o[v] = '%s%s' % (eschar, k) 91 return o

92 93 94 95 96 97

98 -class encoder:

99 - def __init__(self, allowed=None, notallowed=None, regex=None, eschar='%'):

100 """ 101 Note that there are some twiddly points in defining 102 encoders -- the notallowed and allowed arguments 103 need to be thought through carefully, as they are 104 passed into the re module as part of a regular 105 expression. Certain characters may give surprising 106 results. 107 """ 108 assert (regex is not None) + (allowed is not None) + (notallowed is not None) <= 1, "Specify at most one of regex, allowed, notallowed." 109 if notallowed is not None: 110 assert eschar in notallowed, "Sorry: notallowed must contain '%s', but it is '%s'." % (eschar, notallowed) 111 self.ref = re.compile('(^\s)|([%s])|(\s$)' % notallowed) 112 elif regex is not None: 113 self.ref = re.compile(regex) 114 else: 115 if allowed is None: 116 allowed = string.letters + string.digits + \ 117 r"""_!@$^&*()+={}[\]\|:'"?/>.<,\ ~`-""" 118 assert not eschar in allowed, "Cannot allow '%s'." % eschar 119 self.ref = re.compile('(^\s)|([^%s])|(\s$)' % allowed) 120 121 self._reb = re.compile('%s([0-9a-fA-F][0-9a-fA-F]|' % eschar 122 + '|'.join([_c[0] for _c in _specials]) 123 + ')') 124 self._fdict = _expand_fdict(eschar) 125 self.empty = '%smt' % eschar

126

127 - def back(self, x):

128 """Converts back from a string containing %xx escape sequences to 129 an unencoded string. 130 """ 131 try: 132 return self._reb.sub(_fromhex, x) 133 except KeyError, x: 134 raise BadFormatError, "illegal escape sequence: %s" % x

135 136

137 - def _tohex(self, x):

138 """Converts a single character in a MatchObject to a %xx escape sequence""" 139 q = x.string[x.start()] 140 assert len(q)==1, 'tohex operates on a single character' 141 return self._fdict[q]

142 143

144 - def fwd(self, x):

145 """Escapes a string so it is suitable for a=v; form. 146 Nonprinting characters, along with [;#] are converted 147 to %xx escapes (hexadecimal). 148 Non-strings will be converted to strings with repr(), 149 and can be fed back into the python interpreter. """ 150 if not isinstance(x, str): 151 x = repr(x) 152 if x == '': 153 return self.empty 154 # print "x=(%s)" % x 155 return self.ref.sub(self._tohex, x)

156 157 158 159

160 -def test():

161 e = encoder() 162 assert e.back(e.fwd('george')) == 'george' 163 assert e.back(e.fwd('hello there')) == 'hello there' 164 assert e.back('%sfoo') == ';foo' 165 assert e.back('%Sfoo%S%P') == ' foo #' 166 assert e.back('%Tfoo') == '%foo' 167 assert e.back(e.fwd('%hello')) == '%hello' 168 assert e.back(e.fwd(' hello there')) == ' hello there' 169 assert e.back(e.fwd(' hello there\t')) == ' hello there\t' 170 assert e.back(e.fwd(' hello there\t=')) == ' hello there\t=' 171 assert e.back(e.fwd(' hello there\t=;#')) == ' hello there\t=;#' 172 assert e.back(e.fwd(' hello+_there\t=;#')) == ' hello+_there\t=;#' 173 assert e.back(e.fwd('hello+_there\t=;#')) == 'hello+_there\t=;#' 174 assert e.fwd('hello there') == 'hello there' 175 176 ee = encoder('abcd') 177 assert ee.fwd("cab d") == 'cab%Sd' 178 assert ee.fwd("e") == '%65' 179 assert ee.fwd("aaaa bbbb") == 'aaaa%Sbbbb' 180 181 ee = encoder(notallowed = ']\n\r%') 182 assert '\n' not in ee.fwd('hello world\n\r') 183 assert ']' not in ee.fwd('hello]% world\n\r') 184 assert ee.back(ee.fwd('hello world\n\r'))=='hello world\n\r' 185 186 e = encoder(eschar='_', allowed='0-9a-zA-Z') 187 assert e.back('_sfoo') == ';foo' 188 assert e.back(e.fwd('%hello')) == '%hello' 189 assert e.back(e.fwd('_hello')) == '_hello'

190 191 192 if __name__ == '__main__' : 193 test() 194 print "OK: passed tests" 195

Source Code for Module gmisclib.g_encode