1
2
3 """Functions to make unicode handling easier for Python 2.X.
4 """
5
6 import sys
7
8 -def u(s, encoding='utf-8'):
9 """Convert string-like objects to unicode.
10 The general idea is that, as soon as data comes into your program,
11 you call u() on it to make sure it is unicode.
12 @note: u() should be idempotent: i.e. u(u(x)) == u(x)
13 """
14
15
16
17
18
19
20
21
22
23
24 try:
25 s = unicode(s, encoding)
26 except UnicodeDecodeError:
27 s = unicode(s, 'unicode_escape')
28 except TypeError, x:
29 if 'need string' in x.args[0]:
30 x = unicode(str(x), 'unicode_escape')
31 return s
32
33
35 print u('xx\x42\xabooo')
36 print u('x\x42x\xabooo')
37 print u('x\x42x\xcfooo')
38 print u('x\x42x\xffooo')
39 print u('boo\x40\xaboo')
40 assert u('boo\x40\xaboo') == u(u('boo\x40\xaboo'))
41 assert u('x\x42x\xffooo') == u(u('x\x42x\xffooo'))
42
43
44 class _test(object):
45 def __repr__(self):
46 return 'boo\x40\xaboo'
47 print u(_test())
48
50 """Encode unicode into a bytestring, for printing.
51 The idea is that, just before you print anything,
52 you call e() on the data to be printed."""
53 if sys.getdefaultencoding() == 'ascii':
54 return s.encode('utf-8')
55 return s
56