From a7d50f5f4a9f69e33075fbe05d4ae42158d1b9c9 Mon Sep 17 00:00:00 2001 From: Nils Philippsen Date: Wed, 26 Oct 2011 17:31:36 +0200 Subject: [PATCH 1/2] python: implement and use utf8 stream writer for stdout, stderr The C-side (glib) really wants stuff to be encoded in UTF-8. (cherry picked from commit b5a5011f31d6062cd00ee6b02ddf356d691e67e6) --- lib/python/packagekit/backend.py | 45 ++++++++++++++++++++++++++++++++++++++ 1 files changed, 45 insertions(+), 0 deletions(-) diff --git a/lib/python/packagekit/backend.py b/lib/python/packagekit/backend.py index ae7bbad..4c7fd2c 100644 --- a/lib/python/packagekit/backend.py +++ b/lib/python/packagekit/backend.py @@ -37,8 +37,49 @@ def _to_unicode(txt, encoding='utf-8'): txt = unicode(txt, encoding, errors='replace') return txt +def _to_utf8(txt, errors='replace'): + '''convert practically anything to a utf-8-encoded byte string''' + + # convert to unicode object + if isinstance(txt, str): + txt = txt.decode('utf-8', errors=errors) + if not isinstance(txt, basestring): + # try to convert non-string objects like exceptions + try: + # if txt.__unicode__() exists, or txt.__str__() returns ASCII + txt = unicode(txt) + except UnicodeDecodeError: + # if txt.__str__() exists + txt = str(txt).decode('utf-8', errors=errors) + except: + # no __str__(), __unicode__() methods, use representation + txt = unicode(repr(txt)) + + # return encoded as UTF-8 + return txt.encode('utf-8', errors=errors) + # Classes +class _UTF8Writer(codecs.StreamWriter): + + encoding = 'utf-8' + + def __init__(self, stream, errors='replace'): + codecs.StreamWriter.__init__(self, stream, errors) + + def encode(self, inp, errors='strict'): + try: + l = len(inp) + except TypeError: + try: + l = len(unicode(inp)) + except: + try: + l = len(str(inp)) + except: + l = 1 + return (_to_utf8(inp, errors=errors), l) + class PkError(Exception): def __init__(self, code, details): self.code = code @@ -49,6 +90,10 @@ class PkError(Exception): class PackageKitBaseBackend: def __init__(self, cmds): + # Make sys.stdout/stderr cope with UTF-8 + sys.stdout = _UTF8Writer(sys.stdout) + sys.stderr = _UTF8Writer(sys.stderr) + # Setup a custom exception handler installExceptionHandler(self) self.cmds = cmds -- 1.7.6.4 From 01226fbdfc6cd9f03183048c7531ed324d4b5412 Mon Sep 17 00:00:00 2001 From: Nils Philippsen Date: Wed, 26 Oct 2011 17:33:18 +0200 Subject: [PATCH 2/2] yum: don't let yum.misc.setup_locale() override stdout codec Use our own stdout/stderr wrappers, as what codecs.getwriter() supplies only works for unicode, but not for already encoded data. (cherry picked from commit 31b9a6942a65f8eeeb67c6ea376429e8c5eac43f) --- backends/yum/yumBackend.py | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/backends/yum/yumBackend.py b/backends/yum/yumBackend.py index 479ee2d..dfed1a2 100755 --- a/backends/yum/yumBackend.py +++ b/backends/yum/yumBackend.py @@ -3482,7 +3482,7 @@ class PackageKitYumBase(yum.YumBase): raise PkError(ERROR_FAILED_CONFIG_PARSING, _to_unicode(e)) # setup to use LANG for descriptions - yum.misc.setup_locale(override_time=True) + yum.misc.setup_locale(override_time=True, override_codecs=False) self.missingGPGKey = None self.dsCallback = DepSolveCallback(backend) -- 1.7.6.4