Package gmisclib :: Module g_encode
[frames] | no frames]

Source Code for Module gmisclib.g_encode

  1  # -*- coding: utf-8 -*- 
  2  """This module allows strings to be encoded into a 
  3  reduced subset.   It is designed to work for avio.py, 
  4  and to do a minimal mapping, so that the resulting 
  5  text is human-readable.   It is similar to Quoted-printable 
  6  encoding, but is not specialized to e-mail limitations and 
  7  is rather more flexible. 
  8  """ 
  9   
 10   
 11  import re 
 12  import string; 
 13   
 14   
 15  __version__ = "$Revision: 1.10 $" 
 16   
 17   
18 -class BadFormatError(Exception):
19 - def __init__(self, *x):
20 Exception.__init__(self, *x)
21 22 23 _backdict = {'0':0, '1':1, '2':2, '3':3, '4':4, '5':5, 24 '6':6, '7':7, '8':8, '9':9, 25 'a':10, 'A':10, 'b':11, 'B':11, 26 'c':12, 'C':12, 'd':13, 'D':13, 27 'e':14, 'E':14, 'f':15, 'F':15 } 28 29 30 _specials = [ ('mt', ''), 31 (' ', '_'), 32 ('u', '_'), 33 ('p', '.'), 34 ('m', ','), 35 ('s', ';'), 36 ('z', '='), 37 ('t', '\t'), 38 ('Z', '\033'), 39 ('M', '&'), 40 ('T', '%'), 41 ('l', '/'), 42 ('K', '\\'), 43 ('k', '\b'), 44 ('R', '\r'), 45 ('L', '\n'), 46 ('q', '"'), 47 ('Q', '?'), 48 ('U', "'"), 49 ('S', ' '), 50 ('P', '#') 51 ] 52 53 54 55
56 -def _expand_bdict(b):
57 o = {} 58 for (si, ni) in b.items(): 59 for (sj, nj) in b.items(): 60 o[si + sj] = chr(16*ni + nj) 61 for (k, v) in _specials: 62 assert not _backdict.has_key(k), "Special (%s) collides with hex." % k 63 assert not o.has_key(k), "Special (%s) collides with hex or special." % k 64 o[k] = v 65 return o
66 67 68 _bdict = _expand_bdict(_backdict) 69 70
71 -def _fromhex(x):
72 """Expands a %XX code (or the specials above) into a character.""" 73 q = x.group(1) 74 return _bdict[q]
75 76 77
78 -def _rm_nl(s):
79 if s.endswith('\n'): 80 return s[:-1] 81 return s
82 83 84
85 -def _expand_fdict(eschar):
86 o = {} 87 for c in range(256): 88 o[chr(c)] = '%s%02x' % (eschar, c) 89 for (k,v) in _specials: 90 o[v] = '%s%s' % (eschar, k) 91 return o
92 93 94 95 96 97
98 -class encoder:
99 - def __init__(self, allowed=None, notallowed=None, regex=None, eschar='%'):
100 """ 101 Note that there are some twiddly points in defining 102 encoders -- the notallowed and allowed arguments 103 need to be thought through carefully, as they are 104 passed into the re module as part of a regular 105 expression. Certain characters may give surprising 106 results. 107 """ 108 assert (regex is not None) + (allowed is not None) + (notallowed is not None) <= 1, "Specify at most one of regex, allowed, notallowed." 109 if notallowed is not None: 110 assert eschar in notallowed, "Sorry: notallowed must contain '%s', but it is '%s'." % (eschar, notallowed) 111 self.ref = re.compile('(^\s)|([%s])|(\s$)' % notallowed) 112 elif regex is not None: 113 self.ref = re.compile(regex) 114 else: 115 if allowed is None: 116 allowed = string.letters + string.digits + \ 117 r"""_!@$^&*()+={}[\]\|:'"?/>.<,\ ~`-""" 118 assert not eschar in allowed, "Cannot allow '%s'." % eschar 119 self.ref = re.compile('(^\s)|([^%s])|(\s$)' % allowed) 120 121 self._reb = re.compile('%s([0-9a-fA-F][0-9a-fA-F]|' % eschar 122 + '|'.join([_c[0] for _c in _specials]) 123 + ')') 124 self._fdict = _expand_fdict(eschar) 125 self.empty = '%smt' % eschar
126
127 - def back(self, x):
128 """Converts back from a string containing %xx escape sequences to 129 an unencoded string. 130 """ 131 try: 132 return self._reb.sub(_fromhex, x) 133 except KeyError, x: 134 raise BadFormatError, "illegal escape sequence: %s" % x
135 136
137 - def _tohex(self, x):
138 """Converts a single character in a MatchObject to a %xx escape sequence""" 139 q = x.string[x.start()] 140 assert len(q)==1, 'tohex operates on a single character' 141 return self._fdict[q]
142 143
144 - def fwd(self, x):
145 """Escapes a string so it is suitable for a=v; form. 146 Nonprinting characters, along with [;#] are converted 147 to %xx escapes (hexadecimal). 148 Non-strings will be converted to strings with repr(), 149 and can be fed back into the python interpreter. """ 150 if not isinstance(x, str): 151 x = repr(x) 152 if x == '': 153 return self.empty 154 # print "x=(%s)" % x 155 return self.ref.sub(self._tohex, x)
156 157 158 159
160 -def test():
161 e = encoder() 162 assert e.back(e.fwd('george')) == 'george' 163 assert e.back(e.fwd('hello there')) == 'hello there' 164 assert e.back('%sfoo') == ';foo' 165 assert e.back('%Sfoo%S%P') == ' foo #' 166 assert e.back('%Tfoo') == '%foo' 167 assert e.back(e.fwd('%hello')) == '%hello' 168 assert e.back(e.fwd(' hello there')) == ' hello there' 169 assert e.back(e.fwd(' hello there\t')) == ' hello there\t' 170 assert e.back(e.fwd(' hello there\t=')) == ' hello there\t=' 171 assert e.back(e.fwd(' hello there\t=;#')) == ' hello there\t=;#' 172 assert e.back(e.fwd(' hello+_there\t=;#')) == ' hello+_there\t=;#' 173 assert e.back(e.fwd('hello+_there\t=;#')) == 'hello+_there\t=;#' 174 assert e.fwd('hello there') == 'hello there' 175 176 ee = encoder('abcd') 177 assert ee.fwd("cab d") == 'cab%Sd' 178 assert ee.fwd("e") == '%65' 179 assert ee.fwd("aaaa bbbb") == 'aaaa%Sbbbb' 180 181 ee = encoder(notallowed = ']\n\r%') 182 assert '\n' not in ee.fwd('hello world\n\r') 183 assert ']' not in ee.fwd('hello]% world\n\r') 184 assert ee.back(ee.fwd('hello world\n\r'))=='hello world\n\r' 185 186 e = encoder(eschar='_', allowed='0-9a-zA-Z') 187 assert e.back('_sfoo') == ';foo' 188 assert e.back(e.fwd('%hello')) == '%hello' 189 assert e.back(e.fwd('_hello')) == '_hello'
190 191 192 if __name__ == '__main__' : 193 test() 194 print "OK: passed tests" 195