From 517b2c653b6fed4b4a9ef5a8e866cc7aa4bd771d Mon Sep 17 00:00:00 2001 From: DjLegolas Date: Wed, 5 Jan 2022 02:02:32 +0200 Subject: [PATCH] [TrackerIcon] Fixed parse error on UTF-8 sites with non-english chars When parsing the site's page in search for the FAVICON, the page gets opens. The default file encoding in dependent on the running OS, and might not be `UTF-8` on Windows. Therefor, some trackers might not get their icon downloaded at all because of an error: `UnicodeDecodeError: 'charmap' codec can't decode byte 0x90 in position 2158: character maps to `. This fix adds a detection of file encoding using the optional `chardet` dependency, and also a test. Closes: deluge-torrent/deluge#333 Closes: https://dev.deluge-torrent.org/ticket/3479 --- deluge/tests/test_tracker_icons.py | 9 ++++++++- deluge/ui/tracker_icons.py | 13 ++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/deluge/tests/test_tracker_icons.py b/deluge/tests/test_tracker_icons.py index c8e2f32d9..b7158bc1e 100644 --- a/deluge/tests/test_tracker_icons.py +++ b/deluge/tests/test_tracker_icons.py @@ -14,7 +14,6 @@ from . import common from .basetest import BaseTestCase common.set_tmp_config_dir() -deluge.ui.tracker_icons.PIL_INSTALLED = False common.disable_new_release_check() @@ -45,6 +44,14 @@ class TrackerIconsTestCase(BaseTestCase): d.addCallback(self.assertEqual, icon) return d + def test_get_google_ico_hebrew(self): + """Test that Google.co.il page is read as UTF-8""" + icon = TrackerIcon(common.get_test_data_file('google.ico')) + d = self.icons.fetch('www.google.co.il') + d.addCallback(self.assertNotIdentical, None) + d.addCallback(self.assertEqual, icon) + return d + def test_get_google_ico_with_redirect(self): # google.com redirects to www.google.com icon = TrackerIcon(common.get_test_data_file('google.ico')) diff --git a/deluge/ui/tracker_icons.py b/deluge/ui/tracker_icons.py index a771a0ecf..813f82df4 100644 --- a/deluge/ui/tracker_icons.py +++ b/deluge/ui/tracker_icons.py @@ -21,6 +21,11 @@ from deluge.configmanager import get_config_dir from deluge.decorators import proxy from deluge.httpdownloader import download_file +try: + import chardet +except ImportError: + chardet = None + try: from PIL import Image except ImportError: @@ -289,7 +294,13 @@ class TrackerIcons(Component): :returns: a Deferred which callbacks a list of available favicons (url, type) :rtype: Deferred """ - with open(page) as _file: + encoding = 'UTF-8' + if chardet: + with open(page, 'rb') as _file: + result = chardet.detect(_file.read()) + encoding = result['encoding'] + + with open(page, encoding=encoding) as _file: parser = FaviconParser() for line in _file: parser.feed(line)