[HTTPDownloader] Refactor HTTPDownloader to Agent

As of twisted 16.7.0, `twisted.web.client.HTTPDownloader` have been
marked as deprecated.
This caused the tests results to show many lines of warnings about it.
This refactor uses `twisted.web.client.Agent`, as suggested by Twisted.
This commit is contained in:
DjLegolas 2018-07-09 01:18:52 +03:00 committed by Calum Lind
commit c7e61f8c34
3 changed files with 141 additions and 117 deletions

View file

@ -15,33 +15,80 @@ import os.path
import zlib import zlib
from twisted.internet import reactor from twisted.internet import reactor
from twisted.internet.defer import Deferred
from twisted.python.failure import Failure from twisted.python.failure import Failure
from twisted.web import client, http from twisted.web import client, http
from twisted.web.client import URI from twisted.web._newclient import HTTPClientParser
from twisted.web.error import PageRedirect from twisted.web.error import PageRedirect
from twisted.web.http_headers import Headers
from twisted.web.iweb import IAgent
from zope.interface import implementer
from deluge.common import get_version, utf8_encode_structure from deluge.common import get_version
try:
from urllib.parse import urljoin
except ImportError:
# PY2 fallback
from urlparse import urljoin # pylint: disable=ungrouped-imports
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class HTTPDownloader(client.HTTPDownloader): class CompressionDecoder(client.GzipDecoder):
"""A compression decoder for gzip, x-gzip and deflate"""
def deliverBody(self, protocol): # NOQA: N802
self.original.deliverBody(CompressionDecoderProtocol(protocol, self.original))
class CompressionDecoderProtocol(client._GzipProtocol):
"""A compression decoder protocol for CompressionDecoder"""
def __init__(self, protocol, response):
super(CompressionDecoderProtocol, self).__init__(protocol, response)
self._zlibDecompress = zlib.decompressobj(32 + zlib.MAX_WBITS)
class BodyHandler(HTTPClientParser, object):
"""An HTTP parser that saves the response on a file"""
def __init__(self, request, finished, length, agent):
""" """
Factory class for downloading files and keeping track of progress.
:param request: the request to which this parser is for
:type request: twisted.web.iweb.IClientRequest
:param finished: a Deferred to handle the the finished response
:type finished: twisted.internet.defer.Deferred
:param length: the length of the response
:type length: int
:param agent: the agent from which the request was sent
:type agent: twisted.web.iweb.IAgent
"""
super(BodyHandler, self).__init__(request, finished)
self.agent = agent
self.finished = finished
self.total_length = length
self.current_length = 0
self.data = b''
def dataReceived(self, data): # NOQA: N802
self.current_length += len(data)
self.data += data
if self.agent.part_callback:
self.agent.part_callback(data, self.current_length, self.total_length)
def connectionLost(self, reason): # NOQA: N802
with open(self.agent.filename, 'wb') as _file:
_file.write(self.data)
self.finished.callback(self.agent.filename)
self.state = u'DONE'
HTTPClientParser.connectionLost(self, reason)
@implementer(IAgent)
class HTTPDownloaderAgent(object):
"""
A File Downloader Agent
""" """
def __init__( def __init__(
self, url, filename, part_callback=None, headers=None, self, agent, filename, part_callback=None,
force_filename=False, allow_compression=True, force_filename=False, allow_compression=True, handle_redirect=True,
): ):
""" """
:param url: the url to download from :param agent: the agent which will send the requests
:type url: string :type agent: twisted.web.client.Agent
:param filename: the filename to save the file as :param filename: the filename to save the file as
:type filename: string :type filename: string
:param force_filename: forces use of the supplied filename, regardless of header content :param force_filename: forces use of the supplied filename, regardless of header content
@ -49,46 +96,39 @@ class HTTPDownloader(client.HTTPDownloader):
:param part_callback: a function to be called when a part of data :param part_callback: a function to be called when a part of data
is received, it's signature should be: func(data, current_length, total_length) is received, it's signature should be: func(data, current_length, total_length)
:type part_callback: function :type part_callback: function
:param headers: any optional headers to send
:type headers: dictionary
""" """
self.handle_redirect = handle_redirect
self.agent = agent
self.filename = filename
self.part_callback = part_callback self.part_callback = part_callback
self.current_length = 0
self.total_length = 0
self.decoder = None
self.value = filename
self.force_filename = force_filename self.force_filename = force_filename
self.allow_compression = allow_compression self.allow_compression = allow_compression
self.code = None self.decoder = None
agent = 'Deluge/%s (http://deluge-torrent.org)' % get_version()
client.HTTPDownloader.__init__(
self, url, filename, headers=headers, agent=agent.encode('utf-8'))
def gotHeaders(self, headers): # NOQA: N802 def request_callback(self, response):
self.code = int(self.status) finished = Deferred()
if self.code == http.OK:
if b'content-length' in headers:
self.total_length = int(headers[b'content-length'][0])
else:
self.total_length = 0
encodings_accepted = [b'gzip', b'x-gzip', b'deflate'] if not self.handle_redirect and response.code in (
if ( http.MOVED_PERMANENTLY,
self.allow_compression and b'content-encoding' in headers http.FOUND,
and headers[b'content-encoding'][0] in encodings_accepted http.SEE_OTHER,
http.TEMPORARY_REDIRECT,
): ):
# Adding 32 to the wbits enables gzip & zlib decoding (with automatic header detection) location = response.headers.getRawHeaders(b'location')[0]
# Adding 16 just enables gzip decoding (no zlib) error = PageRedirect(response.code, location=location)
self.decoder = zlib.decompressobj(zlib.MAX_WBITS + 32) finished.errback(Failure(error))
else:
headers = response.headers
body_length = int(headers.getRawHeaders(b'content-length', default=[0])[0])
if b'content-disposition' in headers and not self.force_filename: if headers.hasHeader(b'content-disposition') and not self.force_filename:
content_disp = headers[b'content-disposition'][0].decode('utf-8') content_disp = headers.getRawHeaders(b'content-disposition')[0].decode('utf-8')
content_disp_params = cgi.parse_header(content_disp)[1] content_disp_params = cgi.parse_header(content_disp)[1]
if 'filename' in content_disp_params: if 'filename' in content_disp_params:
new_file_name = content_disp_params['filename'] new_file_name = content_disp_params['filename']
new_file_name = sanitise_filename(new_file_name) new_file_name = sanitise_filename(new_file_name)
new_file_name = os.path.join(os.path.split(self.value)[0], new_file_name) new_file_name = os.path.join(os.path.split(self.filename)[0], new_file_name)
count = 1 count = 1
fileroot = os.path.splitext(new_file_name)[0] fileroot = os.path.splitext(new_file_name)[0]
@ -98,39 +138,39 @@ class HTTPDownloader(client.HTTPDownloader):
new_file_name = '%s-%s%s' % (fileroot, count, fileext) new_file_name = '%s-%s%s' % (fileroot, count, fileext)
count += 1 count += 1
self.fileName = new_file_name self.filename = new_file_name
self.value = new_file_name
elif self.code in ( response.deliverBody(BodyHandler(response.request, finished, body_length, self))
http.MOVED_PERMANENTLY,
http.FOUND,
http.SEE_OTHER,
http.TEMPORARY_REDIRECT,
):
location = headers[b'location'][0]
error = PageRedirect(self.code, location=location)
self.noPage(Failure(error))
return client.HTTPDownloader.gotHeaders(self, headers) return finished
def pagePart(self, data): # NOQA: N802 def request(self, method, uri, headers=None, body_producer=None):
if self.code == http.OK: """
self.current_length += len(data)
if self.decoder:
data = self.decoder.decompress(data)
if self.part_callback:
self.part_callback(data, self.current_length, self.total_length)
return client.HTTPDownloader.pagePart(self, data) :param method: the HTTP method to use
:param uri: the url to download from
:type uri: string
:param headers: any optional headers to send
:type headers: twisted.web.http_headers.Headers
:param body_producer:
:return:
"""
if headers is None:
headers = Headers()
def pageEnd(self): # NOQA: N802 if not headers.hasHeader(b'User-Agent'):
if self.decoder: version = get_version()
data = self.decoder.flush() user_agent = 'Deluge/%s (https://deluge-torrent.org)' % version
self.current_length -= len(data) headers.addRawHeader('User-Agent', user_agent)
self.decoder = None
self.pagePart(data)
return client.HTTPDownloader.pageEnd(self) d = self.agent.request(
method=method,
uri=uri,
headers=headers,
bodyProducer=body_producer,
)
d.addCallback(self.request_callback)
return d
def sanitise_filename(filename): def sanitise_filename(filename):
@ -161,7 +201,10 @@ def sanitise_filename(filename):
return filename return filename
def _download_file(url, filename, callback=None, headers=None, force_filename=False, allow_compression=True): def _download_file(
url, filename, callback=None, headers=None,
force_filename=False, allow_compression=True, handle_redirects=True,
):
""" """
Downloads a file from a specific URL and returns a Deferred. A callback Downloads a file from a specific URL and returns a Deferred. A callback
function can be specified to be called as parts are received. function can be specified to be called as parts are received.
@ -185,42 +228,24 @@ def _download_file(url, filename, callback=None, headers=None, force_filename=Fa
""" """
agent = client.Agent(reactor)
if allow_compression: if allow_compression:
if not headers: enc_accepted = ['gzip', 'x-gzip', 'deflate']
headers = {} decoders = [(enc.encode(), CompressionDecoder) for enc in enc_accepted]
headers['accept-encoding'] = 'deflate, gzip, x-gzip' agent = client.ContentDecoderAgent(agent, decoders)
if handle_redirects:
agent = client.RedirectAgent(agent)
url = url.encode('utf8') agent = HTTPDownloaderAgent(agent, filename, callback, force_filename, allow_compression, handle_redirects)
headers = utf8_encode_structure(headers) if headers else headers
factory = HTTPDownloader(url, filename, callback, headers, force_filename, allow_compression)
uri = URI.fromBytes(url) # The Headers init expects dict values to be a list.
host = uri.host if headers:
port = uri.port for name, value in list(headers.items()):
if not isinstance(value, list):
headers[name] = [value]
if uri.scheme == b'https': return agent.request(b'GET', url.encode(), Headers(headers))
from twisted.internet import ssl
# ClientTLSOptions in Twisted >= 14, see ticket #2765 for details on this addition.
try:
from twisted.internet._sslverify import ClientTLSOptions
except ImportError:
ctx_factory = ssl.ClientContextFactory()
else:
class TLSSNIContextFactory(ssl.ClientContextFactory): # pylint: disable=no-init
"""
A custom context factory to add a server name for TLS connections.
"""
def getContext(self): # NOQA: N802
ctx = ssl.ClientContextFactory.getContext(self)
ClientTLSOptions(host, ctx)
return ctx
ctx_factory = TLSSNIContextFactory()
reactor.connectSSL(host, port, factory, ctx_factory)
else:
reactor.connectTCP(host, port, factory)
return factory.deferred
def download_file( def download_file(
@ -255,16 +280,6 @@ def download_file(
return result return result
def on_download_fail(failure): def on_download_fail(failure):
if failure.check(PageRedirect) and handle_redirects:
new_url = urljoin(url, failure.getErrorMessage().split(' to ')[1])
result = _download_file(
new_url, filename, callback=callback, headers=headers,
force_filename=force_filename,
allow_compression=allow_compression,
)
result.addCallbacks(on_download_success, on_download_fail)
else:
# Log the failure and pass to the caller
log.warning( log.warning(
'Error occurred downloading file from "%s": %s', 'Error occurred downloading file from "%s": %s',
url, failure.getErrorMessage(), url, failure.getErrorMessage(),
@ -275,6 +290,7 @@ def download_file(
d = _download_file( d = _download_file(
url, filename, callback=callback, headers=headers, url, filename, callback=callback, headers=headers,
force_filename=force_filename, allow_compression=allow_compression, force_filename=force_filename, allow_compression=allow_compression,
handle_redirects=handle_redirects,
) )
d.addCallbacks(on_download_success, on_download_fail) d.addCallbacks(on_download_success, on_download_fail)
return d return d

BIN
deluge/tests/data/seo.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

View file

@ -59,6 +59,14 @@ class TrackerIconsTestCase(BaseTestCase):
d.addCallback(self.assertEqual, icon) d.addCallback(self.assertEqual, icon)
return d return d
def test_get_seo_ico_with_sni(self):
# seo using certificates with SNI support only
icon = TrackerIcon(common.get_test_data_file('seo.ico'))
d = self.icons.fetch('www.seo.com')
d.addCallback(self.assertNotIdentical, None)
d.addCallback(self.assertEqual, icon)
return d
def test_get_empty_string_tracker(self): def test_get_empty_string_tracker(self):
d = self.icons.fetch('') d = self.icons.fetch('')
d.addCallback(self.assertIdentical, None) d.addCallback(self.assertIdentical, None)