Package gmisclib :: Module g_ucode
[frames] | no frames]

Source Code for Module gmisclib.g_ucode

 1  # -*- coding: utf-8 -*- 
 2   
 3  """Functions to make unicode handling easier for Python 2.X. 
 4  """ 
 5   
 6  import sys 
 7   
8 -def u(s, encoding='utf-8'):
9 """Convert string-like objects to unicode. 10 The general idea is that, as soon as data comes into your program, 11 you call u() on it to make sure it is unicode. 12 @note: u() should be idempotent: i.e. u(u(x)) == u(x) 13 """ 14 # return unicodedata.normalize("NFKD", unicode(s)).encode('ascii', 'ignore') 15 # return s.encode('raw_unicode_escape') 16 # if isinstance(s, basestring): 17 # if not isinstance(s, unicode): 18 # try: 19 # s = unicode(s, encoding) 20 # except UnicodeDecodeError: 21 # s = unicode(s, 'unicode_escape') 22 # else: 23 # s = unicode(s) 24 try: 25 s = unicode(s, encoding) 26 except UnicodeDecodeError: 27 s = unicode(s, 'unicode_escape') 28 except TypeError, x: 29 if 'need string' in x.args[0]: 30 x = unicode(str(x), 'unicode_escape') 31 return s
32 33
34 -def test():
35 print u('xx\x42\xabooo') 36 print u('x\x42x\xabooo') 37 print u('x\x42x\xcfooo') 38 print u('x\x42x\xffooo') 39 print u('boo\x40\xaboo') 40 assert u('boo\x40\xaboo') == u(u('boo\x40\xaboo')) 41 assert u('x\x42x\xffooo') == u(u('x\x42x\xffooo')) 42 43 44 class _test(object): 45 def __repr__(self): 46 return 'boo\x40\xaboo'
47 print u(_test()) 48
49 -def e(s):
50 """Encode unicode into a bytestring, for printing. 51 The idea is that, just before you print anything, 52 you call e() on the data to be printed.""" 53 if sys.getdefaultencoding() == 'ascii': 54 return s.encode('utf-8') 55 return s
56