mirror of
https://git.deluge-torrent.org/deluge
synced 2025-04-21 03:54:50 +00:00
Changed decode_string to always return unicode.
This commit is contained in:
parent
ffb902ba06
commit
60f196ff93
2 changed files with 29 additions and 14 deletions
|
@ -608,22 +608,34 @@ def xml_encode(string):
|
||||||
|
|
||||||
def decode_string(s, encoding="utf8"):
|
def decode_string(s, encoding="utf8"):
|
||||||
"""
|
"""
|
||||||
Decodes a string and re-encodes it in utf8. If it cannot decode using
|
Decodes a string and return unicode. If it cannot decode using
|
||||||
`:param:encoding` then it will try to detect the string encoding and
|
`:param:encoding` then it will try latin1, and if that fails,
|
||||||
decode it.
|
try to detect the string encoding. If that fails, decode with
|
||||||
|
ignore.
|
||||||
|
|
||||||
:param s: string to decode
|
:param s: string to decode
|
||||||
:type s: string
|
:type s: string
|
||||||
:keyword encoding: the encoding to use in the decoding
|
:keyword encoding: the encoding to use in the decoding
|
||||||
:type encoding: string
|
:type encoding: string
|
||||||
|
:returns: s converted to unicode
|
||||||
|
:rtype: unicode
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
if not s:
|
||||||
|
return u''
|
||||||
|
elif isinstance(s, unicode):
|
||||||
|
return s
|
||||||
|
|
||||||
try:
|
encodings = [(encoding, 'strict'), ("utf8", 'strict'),
|
||||||
s = s.decode(encoding).encode("utf8", "ignore")
|
("iso-8859-1", 'strict'),
|
||||||
except UnicodeDecodeError:
|
(chardet.detect(s)["encoding"], 'strict'),
|
||||||
s = s.decode(chardet.detect(s)["encoding"], "ignore").encode("utf8", "ignore")
|
(chardet.detect(s)["encoding"], 'ignore')]
|
||||||
return s
|
for i in range(len(encodings)):
|
||||||
|
try:
|
||||||
|
return s.decode(encodings[i][0], encodings[i][1])
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
return u''
|
||||||
|
|
||||||
def utf8_encoded(s):
|
def utf8_encoded(s):
|
||||||
"""
|
"""
|
||||||
|
@ -636,7 +648,10 @@ def utf8_encoded(s):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if isinstance(s, str):
|
if isinstance(s, str):
|
||||||
s = decode_string(s)
|
try:
|
||||||
|
s = decode_string(s).encode("utf8")
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
log.warn("Error when encoding to utf8: %s" % s)
|
||||||
elif isinstance(s, unicode):
|
elif isinstance(s, unicode):
|
||||||
s = s.encode("utf8", "ignore")
|
s = s.encode("utf8", "ignore")
|
||||||
return s
|
return s
|
||||||
|
|
|
@ -51,7 +51,7 @@ except ImportError:
|
||||||
from sha import sha
|
from sha import sha
|
||||||
|
|
||||||
from deluge import bencode
|
from deluge import bencode
|
||||||
from deluge.common import decode_string, path_join
|
from deluge.common import utf8_encoded, path_join
|
||||||
import deluge.configmanager
|
import deluge.configmanager
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
@ -88,9 +88,9 @@ class TorrentInfo(object):
|
||||||
# Check if 'name.utf-8' is in the torrent and if not try to decode the string
|
# Check if 'name.utf-8' is in the torrent and if not try to decode the string
|
||||||
# using the encoding found.
|
# using the encoding found.
|
||||||
if "name.utf-8" in self.__m_metadata["info"]:
|
if "name.utf-8" in self.__m_metadata["info"]:
|
||||||
self.__m_name = decode_string(self.__m_metadata["info"]["name.utf-8"])
|
self.__m_name = utf8_encoded(self.__m_metadata["info"]["name.utf-8"])
|
||||||
else:
|
else:
|
||||||
self.__m_name = decode_string(self.__m_metadata["info"]["name"], self.encoding)
|
self.__m_name = utf8_encoded(self.__m_metadata["info"]["name"], self.encoding)
|
||||||
|
|
||||||
# Get list of files from torrent info
|
# Get list of files from torrent info
|
||||||
paths = {}
|
paths = {}
|
||||||
|
@ -104,7 +104,7 @@ class TorrentInfo(object):
|
||||||
if "path.utf-8" in f:
|
if "path.utf-8" in f:
|
||||||
path = os.path.join(prefix, *f["path.utf-8"])
|
path = os.path.join(prefix, *f["path.utf-8"])
|
||||||
else:
|
else:
|
||||||
path = decode_string(os.path.join(prefix, decode_string(os.path.join(*f["path"]), self.encoding)), self.encoding)
|
path = utf8_encoded(os.path.join(prefix, utf8_encoded(os.path.join(*f["path"]), self.encoding)), self.encoding)
|
||||||
f["index"] = index
|
f["index"] = index
|
||||||
paths[path] = f
|
paths[path] = f
|
||||||
|
|
||||||
|
@ -160,7 +160,7 @@ class TorrentInfo(object):
|
||||||
if "path.utf-8" in f:
|
if "path.utf-8" in f:
|
||||||
path = os.path.join(prefix, *f["path.utf-8"])
|
path = os.path.join(prefix, *f["path.utf-8"])
|
||||||
else:
|
else:
|
||||||
path = decode_string(os.path.join(prefix, decode_string(os.path.join(*f["path"]), self.encoding)), self.encoding)
|
path = utf8_encoded(os.path.join(prefix, utf8_encoded(os.path.join(*f["path"]), self.encoding)), self.encoding)
|
||||||
self.__m_files.append({
|
self.__m_files.append({
|
||||||
'path': path,
|
'path': path,
|
||||||
'size': f["length"],
|
'size': f["length"],
|
||||||
|
|
Loading…
Add table
Reference in a new issue