1
2 """This module allows strings to be encoded into a
3 reduced subset. It is designed to work for avio.py,
4 and to do a minimal mapping, so that the resulting
5 text is human-readable. It is similar to Quoted-printable
6 encoding, but is not specialized to e-mail limitations and
7 is rather more flexible.
8 """
9
10
11 import re
12 import string;
13
14
15 __version__ = "$Revision: 1.10 $"
16
17
21
22
23 _backdict = {'0':0, '1':1, '2':2, '3':3, '4':4, '5':5,
24 '6':6, '7':7, '8':8, '9':9,
25 'a':10, 'A':10, 'b':11, 'B':11,
26 'c':12, 'C':12, 'd':13, 'D':13,
27 'e':14, 'E':14, 'f':15, 'F':15 }
28
29
30 _specials = [ ('mt', ''),
31 (' ', '_'),
32 ('u', '_'),
33 ('p', '.'),
34 ('m', ','),
35 ('s', ';'),
36 ('z', '='),
37 ('t', '\t'),
38 ('Z', '\033'),
39 ('M', '&'),
40 ('T', '%'),
41 ('l', '/'),
42 ('K', '\\'),
43 ('k', '\b'),
44 ('R', '\r'),
45 ('L', '\n'),
46 ('q', '"'),
47 ('Q', '?'),
48 ('U', "'"),
49 ('S', ' '),
50 ('P', '#')
51 ]
52
53
54
55
57 o = {}
58 for (si, ni) in b.items():
59 for (sj, nj) in b.items():
60 o[si + sj] = chr(16*ni + nj)
61 for (k, v) in _specials:
62 assert not _backdict.has_key(k), "Special (%s) collides with hex." % k
63 assert not o.has_key(k), "Special (%s) collides with hex or special." % k
64 o[k] = v
65 return o
66
67
68 _bdict = _expand_bdict(_backdict)
69
70
72 """Expands a %XX code (or the specials above) into a character."""
73 q = x.group(1)
74 return _bdict[q]
75
76
77
79 if s.endswith('\n'):
80 return s[:-1]
81 return s
82
83
84
86 o = {}
87 for c in range(256):
88 o[chr(c)] = '%s%02x' % (eschar, c)
89 for (k,v) in _specials:
90 o[v] = '%s%s' % (eschar, k)
91 return o
92
93
94
95
96
97
99 - def __init__(self, allowed=None, notallowed=None, regex=None, eschar='%'):
100 """
101 Note that there are some twiddly points in defining
102 encoders -- the notallowed and allowed arguments
103 need to be thought through carefully, as they are
104 passed into the re module as part of a regular
105 expression. Certain characters may give surprising
106 results.
107 """
108 assert (regex is not None) + (allowed is not None) + (notallowed is not None) <= 1, "Specify at most one of regex, allowed, notallowed."
109 if notallowed is not None:
110 assert eschar in notallowed, "Sorry: notallowed must contain '%s', but it is '%s'." % (eschar, notallowed)
111 self.ref = re.compile('(^\s)|([%s])|(\s$)' % notallowed)
112 elif regex is not None:
113 self.ref = re.compile(regex)
114 else:
115 if allowed is None:
116 allowed = string.letters + string.digits + \
117 r"""_!@$^&*()+={}[\]\|:'"?/>.<,\ ~`-"""
118 assert not eschar in allowed, "Cannot allow '%s'." % eschar
119 self.ref = re.compile('(^\s)|([^%s])|(\s$)' % allowed)
120
121 self._reb = re.compile('%s([0-9a-fA-F][0-9a-fA-F]|' % eschar
122 + '|'.join([_c[0] for _c in _specials])
123 + ')')
124 self._fdict = _expand_fdict(eschar)
125 self.empty = '%smt' % eschar
126
128 """Converts back from a string containing %xx escape sequences to
129 an unencoded string.
130 """
131 try:
132 return self._reb.sub(_fromhex, x)
133 except KeyError, x:
134 raise BadFormatError, "illegal escape sequence: %s" % x
135
136
138 """Converts a single character in a MatchObject to a %xx escape sequence"""
139 q = x.string[x.start()]
140 assert len(q)==1, 'tohex operates on a single character'
141 return self._fdict[q]
142
143
145 """Escapes a string so it is suitable for a=v; form.
146 Nonprinting characters, along with [;#] are converted
147 to %xx escapes (hexadecimal).
148 Non-strings will be converted to strings with repr(),
149 and can be fed back into the python interpreter. """
150 if not isinstance(x, str):
151 x = repr(x)
152 if x == '':
153 return self.empty
154
155 return self.ref.sub(self._tohex, x)
156
157
158
159
161 e = encoder()
162 assert e.back(e.fwd('george')) == 'george'
163 assert e.back(e.fwd('hello there')) == 'hello there'
164 assert e.back('%sfoo') == ';foo'
165 assert e.back('%Sfoo%S%P') == ' foo #'
166 assert e.back('%Tfoo') == '%foo'
167 assert e.back(e.fwd('%hello')) == '%hello'
168 assert e.back(e.fwd(' hello there')) == ' hello there'
169 assert e.back(e.fwd(' hello there\t')) == ' hello there\t'
170 assert e.back(e.fwd(' hello there\t=')) == ' hello there\t='
171 assert e.back(e.fwd(' hello there\t=;#')) == ' hello there\t=;#'
172 assert e.back(e.fwd(' hello+_there\t=;#')) == ' hello+_there\t=;#'
173 assert e.back(e.fwd('hello+_there\t=;#')) == 'hello+_there\t=;#'
174 assert e.fwd('hello there') == 'hello there'
175
176 ee = encoder('abcd')
177 assert ee.fwd("cab d") == 'cab%Sd'
178 assert ee.fwd("e") == '%65'
179 assert ee.fwd("aaaa bbbb") == 'aaaa%Sbbbb'
180
181 ee = encoder(notallowed = ']\n\r%')
182 assert '\n' not in ee.fwd('hello world\n\r')
183 assert ']' not in ee.fwd('hello]% world\n\r')
184 assert ee.back(ee.fwd('hello world\n\r'))=='hello world\n\r'
185
186 e = encoder(eschar='_', allowed='0-9a-zA-Z')
187 assert e.back('_sfoo') == ';foo'
188 assert e.back(e.fwd('%hello')) == '%hello'
189 assert e.back(e.fwd('_hello')) == '_hello'
190
191
192 if __name__ == '__main__' :
193 test()
194 print "OK: passed tests"
195