| 1 |
import types |
|---|
| 2 |
import urllib |
|---|
| 3 |
import datetime |
|---|
| 4 |
from django.utils.functional import Promise |
|---|
| 5 |
|
|---|
| 6 |
class DjangoUnicodeDecodeError(UnicodeDecodeError): |
|---|
| 7 |
def __init__(self, obj, *args): |
|---|
| 8 |
self.obj = obj |
|---|
| 9 |
UnicodeDecodeError.__init__(self, *args) |
|---|
| 10 |
|
|---|
| 11 |
def __str__(self): |
|---|
| 12 |
original = UnicodeDecodeError.__str__(self) |
|---|
| 13 |
return '%s. You passed in %r (%s)' % (original, self.obj, |
|---|
| 14 |
type(self.obj)) |
|---|
| 15 |
|
|---|
| 16 |
class StrAndUnicode(object): |
|---|
| 17 |
""" |
|---|
| 18 |
A class whose __str__ returns its __unicode__ as a UTF-8 bytestring. |
|---|
| 19 |
|
|---|
| 20 |
Useful as a mix-in. |
|---|
| 21 |
""" |
|---|
| 22 |
def __str__(self): |
|---|
| 23 |
return self.__unicode__().encode('utf-8') |
|---|
| 24 |
|
|---|
| 25 |
def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'): |
|---|
| 26 |
""" |
|---|
| 27 |
Returns a unicode object representing 's'. Treats bytestrings using the |
|---|
| 28 |
'encoding' codec. |
|---|
| 29 |
|
|---|
| 30 |
If strings_only is True, don't convert (some) non-string-like objects. |
|---|
| 31 |
""" |
|---|
| 32 |
if isinstance(s, Promise): |
|---|
| 33 |
# The input is the result of a gettext_lazy() call. |
|---|
| 34 |
return s |
|---|
| 35 |
return force_unicode(s, encoding, strings_only, errors) |
|---|
| 36 |
|
|---|
| 37 |
def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'): |
|---|
| 38 |
""" |
|---|
| 39 |
Similar to smart_unicode, except that lazy instances are resolved to |
|---|
| 40 |
strings, rather than kept as lazy objects. |
|---|
| 41 |
|
|---|
| 42 |
If strings_only is True, don't convert (some) non-string-like objects. |
|---|
| 43 |
""" |
|---|
| 44 |
if strings_only and isinstance(s, (types.NoneType, int, long, datetime.datetime, datetime.date, datetime.time, float)): |
|---|
| 45 |
return s |
|---|
| 46 |
try: |
|---|
| 47 |
if not isinstance(s, basestring,): |
|---|
| 48 |
if hasattr(s, '__unicode__'): |
|---|
| 49 |
s = unicode(s) |
|---|
| 50 |
else: |
|---|
| 51 |
try: |
|---|
| 52 |
s = unicode(str(s), encoding, errors) |
|---|
| 53 |
except UnicodeEncodeError: |
|---|
| 54 |
if not isinstance(s, Exception): |
|---|
| 55 |
raise |
|---|
| 56 |
# If we get to here, the caller has passed in an Exception |
|---|
| 57 |
# subclass populated with non-ASCII data without special |
|---|
| 58 |
# handling to display as a string. We need to handle this |
|---|
| 59 |
# without raising a further exception. We do an |
|---|
| 60 |
# approximation to what the Exception's standard str() |
|---|
| 61 |
# output should be. |
|---|
| 62 |
s = ' '.join([force_unicode(arg, encoding, strings_only, |
|---|
| 63 |
errors) for arg in s]) |
|---|
| 64 |
elif not isinstance(s, unicode): |
|---|
| 65 |
# Note: We use .decode() here, instead of unicode(s, encoding, |
|---|
| 66 |
# errors), so that if s is a SafeString, it ends up being a |
|---|
| 67 |
# SafeUnicode at the end. |
|---|
| 68 |
s = s.decode(encoding, errors) |
|---|
| 69 |
except UnicodeDecodeError, e: |
|---|
| 70 |
raise DjangoUnicodeDecodeError(s, *e.args) |
|---|
| 71 |
return s |
|---|
| 72 |
|
|---|
| 73 |
def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): |
|---|
| 74 |
""" |
|---|
| 75 |
Returns a bytestring version of 's', encoded as specified in 'encoding'. |
|---|
| 76 |
|
|---|
| 77 |
If strings_only is True, don't convert (some) non-string-like objects. |
|---|
| 78 |
""" |
|---|
| 79 |
if strings_only and isinstance(s, (types.NoneType, int)): |
|---|
| 80 |
return s |
|---|
| 81 |
if isinstance(s, Promise): |
|---|
| 82 |
return unicode(s).encode(encoding, errors) |
|---|
| 83 |
elif not isinstance(s, basestring): |
|---|
| 84 |
try: |
|---|
| 85 |
return str(s) |
|---|
| 86 |
except UnicodeEncodeError: |
|---|
| 87 |
if isinstance(s, Exception): |
|---|
| 88 |
# An Exception subclass containing non-ASCII data that doesn't |
|---|
| 89 |
# know how to print itself properly. We shouldn't raise a |
|---|
| 90 |
# further exception. |
|---|
| 91 |
return ' '.join([smart_str(arg, encoding, strings_only, |
|---|
| 92 |
errors) for arg in s]) |
|---|
| 93 |
return unicode(s).encode(encoding, errors) |
|---|
| 94 |
elif isinstance(s, unicode): |
|---|
| 95 |
return s.encode(encoding, errors) |
|---|
| 96 |
elif s and encoding != 'utf-8': |
|---|
| 97 |
return s.decode('utf-8', errors).encode(encoding, errors) |
|---|
| 98 |
else: |
|---|
| 99 |
return s |
|---|
| 100 |
|
|---|
| 101 |
def iri_to_uri(iri): |
|---|
| 102 |
""" |
|---|
| 103 |
Convert an Internationalized Resource Identifier (IRI) portion to a URI |
|---|
| 104 |
portion that is suitable for inclusion in a URL. |
|---|
| 105 |
|
|---|
| 106 |
This is the algorithm from section 3.1 of RFC 3987. However, since we are |
|---|
| 107 |
assuming input is either UTF-8 or unicode already, we can simplify things a |
|---|
| 108 |
little from the full method. |
|---|
| 109 |
|
|---|
| 110 |
Returns an ASCII string containing the encoded result. |
|---|
| 111 |
""" |
|---|
| 112 |
# The list of safe characters here is constructed from the printable ASCII |
|---|
| 113 |
# characters that are not explicitly excluded by the list at the end of |
|---|
| 114 |
# section 3.1 of RFC 3987. |
|---|
| 115 |
if iri is None: |
|---|
| 116 |
return iri |
|---|
| 117 |
return urllib.quote(smart_str(iri), safe='/#%[]=:;$&()+,!?*') |
|---|