[#1260] Handle redirection better with httpdownloader

This commit is contained in:
bendikro 2015-11-29 15:55:58 +01:00 committed by Calum Lind
commit 02f6bfd578
5 changed files with 134 additions and 96 deletions

View file

@ -15,11 +15,9 @@ import os
import shutil import shutil
import tempfile import tempfile
import threading import threading
from urlparse import urljoin
import twisted.web.client
import twisted.web.error
from twisted.internet import reactor, task from twisted.internet import reactor, task
from twisted.web.client import getPage
import deluge.common import deluge.common
import deluge.component as component import deluge.component as component
@ -282,25 +280,9 @@ class Core(component.Component):
return self.add_torrent_file(filename, base64.encodestring(data), options) return self.add_torrent_file(filename, base64.encodestring(data), options)
def on_download_fail(failure): def on_download_fail(failure):
if failure.check(twisted.web.error.PageRedirect): # Log the error and pass the failure onto the client
new_url = urljoin(url, failure.getErrorMessage().split(" to ")[1]) log.error("Failed to add torrent from url %s", url)
result = download_file( return failure
new_url, tempfile.mkstemp()[1], headers=headers,
force_filename=True
)
result.addCallbacks(on_download_success, on_download_fail)
elif failure.check(twisted.web.client.PartialDownloadError):
result = download_file(
url, tempfile.mkstemp()[1], headers=headers,
force_filename=True, allow_compression=False
)
result.addCallbacks(on_download_success, on_download_fail)
else:
# Log the error and pass the failure onto the client
log.error("Error occurred downloading torrent from %s", url)
log.error("Reason: %s", failure.getErrorMessage())
result = failure
return result
d = download_file( d = download_file(
url, tempfile.mkstemp()[1], headers=headers, force_filename=True url, tempfile.mkstemp()[1], headers=headers, force_filename=True
@ -890,8 +872,6 @@ class Core(component.Component):
:rtype: bool :rtype: bool
""" """
from twisted.web.client import getPage
d = getPage("http://deluge-torrent.org/test_port.php?port=%s" % d = getPage("http://deluge-torrent.org/test_port.php?port=%s" %
self.get_listen_port(), timeout=30) self.get_listen_port(), timeout=30)

View file

@ -10,6 +10,7 @@
import logging import logging
import os.path import os.path
import zlib import zlib
from urlparse import urljoin
from twisted.internet import reactor from twisted.internet import reactor
from twisted.python.failure import Failure from twisted.python.failure import Failure
@ -139,32 +140,28 @@ def sanitise_filename(filename):
return filename return filename
def download_file(url, filename, callback=None, headers=None, force_filename=False, allow_compression=True): def _download_file(url, filename, callback=None, headers=None, force_filename=False, allow_compression=True):
""" """
Downloads a file from a specific URL and returns a Deferred. You can also Downloads a file from a specific URL and returns a Deferred. A callback
specify a callback function to be called as parts are received. function can be specified to be called as parts are received.
:param url: the url to download from Args:
:type url: string url (str): The url to download from
:param filename: the filename to save the file as filename (str): The filename to save the file as
:type filename: string callback (func): A function to be called when a part of data is received,
:param callback: a function to be called when a part of data is received, it's signature should be: func(data, current_length, total_length)
it's signature should be: func(data, current_length, total_length) headers (dict): Any optional headers to send
:type callback: function force_filename (bool): force us to use the filename specified rather than
:param headers: any optional headers to send one the server may suggest
:type headers: dictionary allow_compression (bool): Allows gzip & deflate decoding
:param force_filename: force us to use the filename specified rather than
one the server may suggest
:type force_filename: boolean
:param allow_compression: allows gzip & deflate decoding
:type allow_compression: boolean
:returns: the filename of the downloaded file Returns:
:rtype: Deferred Deferred: the filename of the downloaded file
Raises:
t.w.e.PageRedirect
t.w.e.Error: for all other HTTP response errors
:raises t.w.e.PageRedirect: when server responds with a temporary redirect
or permanently moved.
:raises t.w.e.Error: for all other HTTP response errors (besides OK)
""" """
url = str(url) url = str(url)
filename = str(filename) filename = str(filename)
@ -216,3 +213,52 @@ def download_file(url, filename, callback=None, headers=None, force_filename=Fal
reactor.connectTCP(host, port, factory) reactor.connectTCP(host, port, factory)
return factory.deferred return factory.deferred
def download_file(url, filename, callback=None, headers=None, force_filename=False,
allow_compression=True, handle_redirects=True):
"""
Downloads a file from a specific URL and returns a Deferred. A callback
function can be specified to be called as parts are received.
Args:
url (str): The url to download from
filename (str): The filename to save the file as
callback (func): A function to be called when a part of data is received,
it's signature should be: func(data, current_length, total_length)
headers (dict): Any optional headers to send
force_filename (bool): force us to use the filename specified rather than
one the server may suggest
allow_compression (bool): Allows gzip & deflate decoding
handle_redirects (bool): If HTTP redirects should be handled automatically
Returns:
Deferred: the filename of the downloaded file
Raises:
t.w.e.PageRedirect: Unless handle_redirects=True
t.w.e.Error: for all other HTTP response errors
"""
def on_download_success(result):
log.debug("Download success!")
return result
def on_download_fail(failure):
if failure.check(PageRedirect) and handle_redirects:
new_url = urljoin(url, failure.getErrorMessage().split(" to ")[1])
result = _download_file(new_url, filename, callback=callback, headers=headers,
force_filename=force_filename,
allow_compression=allow_compression)
result.addCallbacks(on_download_success, on_download_fail)
else:
# Log the error and pass the failure to the caller
log.error("Error occurred downloading torrent from '%s': %s",
url, failure.getErrorMessage())
result = failure
return result
d = _download_file(url, filename, callback=callback, headers=headers,
force_filename=force_filename, allow_compression=allow_compression)
d.addCallbacks(on_download_success, on_download_fail)
return d

View file

@ -5,8 +5,10 @@ from twisted.internet import reactor
from twisted.internet.error import CannotListenError from twisted.internet.error import CannotListenError
from twisted.python.failure import Failure from twisted.python.failure import Failure
from twisted.trial import unittest from twisted.trial import unittest
from twisted.web.error import PageRedirect
from twisted.web.http import NOT_MODIFIED from twisted.web.http import NOT_MODIFIED
from twisted.web.server import Site from twisted.web.server import Site
from twisted.web.util import redirectTo
from deluge.httpdownloader import download_file from deluge.httpdownloader import download_file
from deluge.log import setup_logger from deluge.log import setup_logger
@ -32,7 +34,8 @@ def fname(name):
class RedirectResource(Resource): class RedirectResource(Resource):
def render(self, request): def render(self, request):
request.redirect("http://localhost:51242/") url = self.get_url()
return redirectTo(url, request)
class RenameResource(Resource): class RenameResource(Resource):
@ -66,6 +69,24 @@ class GzipResource(Resource):
return compress(message, request) return compress(message, request)
class PartialDownloadResource(Resource):
def __init__(self, *args, **kwargs):
self.render_count = 0
def render(self, request):
# encoding = request.requestHeaders._rawHeaders.get("accept-encoding", None)
if self.render_count == 0:
request.setHeader("content-length", "5")
else:
request.setHeader("content-length", "3")
# if encoding == "deflate, gzip, x-gzip":
request.write('abc')
self.render_count += 1
return ''
class TopLevelResource(Resource): class TopLevelResource(Resource):
addSlash = True addSlash = True
@ -74,8 +95,10 @@ class TopLevelResource(Resource):
Resource.__init__(self) Resource.__init__(self)
self.putChild("cookie", CookieResource()) self.putChild("cookie", CookieResource())
self.putChild("gzip", GzipResource()) self.putChild("gzip", GzipResource())
self.putChild("redirect", RedirectResource()) self.redirect_rsrc = RedirectResource()
self.putChild("redirect", self.redirect_rsrc)
self.putChild("rename", RenameResource()) self.putChild("rename", RenameResource())
self.putChild("partial", PartialDownloadResource())
def getChild(self, path, request): # NOQA def getChild(self, path, request): # NOQA
if path == "": if path == "":
@ -91,13 +114,17 @@ class TopLevelResource(Resource):
class DownloadFileTestCase(unittest.TestCase): class DownloadFileTestCase(unittest.TestCase):
def get_url(self, path=""):
return "http://localhost:%d/%s" % (self.listen_port, path)
def setUp(self): # NOQA def setUp(self): # NOQA
setup_logger("warning", fname("log_file")) setup_logger("warning", fname("log_file"))
website = Site(TopLevelResource()) self.website = Site(TopLevelResource())
self.listen_port = 51242 self.listen_port = 51242
self.website.resource.redirect_rsrc.get_url = self.get_url
for dummy in range(10): for dummy in range(10):
try: try:
self.webserver = reactor.listenTCP(self.listen_port, website) self.webserver = reactor.listenTCP(self.listen_port, self.website)
except CannotListenError as ex: except CannotListenError as ex:
error = ex error = ex
self.listen_port += 1 self.listen_port += 1
@ -130,19 +157,19 @@ class DownloadFileTestCase(unittest.TestCase):
return filename return filename
def test_download(self): def test_download(self):
d = download_file("http://localhost:%d/" % self.listen_port, fname("index.html")) d = download_file(self.get_url(), fname("index.html"))
d.addCallback(self.assertEqual, fname("index.html")) d.addCallback(self.assertEqual, fname("index.html"))
return d return d
def test_download_without_required_cookies(self): def test_download_without_required_cookies(self):
url = "http://localhost:%d/cookie" % self.listen_port url = self.get_url("cookie")
d = download_file(url, fname("none")) d = download_file(url, fname("none"))
d.addCallback(self.fail) d.addCallback(self.fail)
d.addErrback(self.assertIsInstance, Failure) d.addErrback(self.assertIsInstance, Failure)
return d return d
def test_download_with_required_cookies(self): def test_download_with_required_cookies(self):
url = "http://localhost:%d/cookie" % self.listen_port url = self.get_url("cookie")
cookie = {"cookie": "password=deluge"} cookie = {"cookie": "password=deluge"}
d = download_file(url, fname("monster"), headers=cookie) d = download_file(url, fname("monster"), headers=cookie)
d.addCallback(self.assertEqual, fname("monster")) d.addCallback(self.assertEqual, fname("monster"))
@ -150,7 +177,7 @@ class DownloadFileTestCase(unittest.TestCase):
return d return d
def test_download_with_rename(self): def test_download_with_rename(self):
url = "http://localhost:%d/rename?filename=renamed" % self.listen_port url = self.get_url("rename?filename=renamed")
d = download_file(url, fname("original")) d = download_file(url, fname("original"))
d.addCallback(self.assertEqual, fname("renamed")) d.addCallback(self.assertEqual, fname("renamed"))
d.addCallback(self.assertContains, "This file should be called renamed") d.addCallback(self.assertContains, "This file should be called renamed")
@ -158,54 +185,64 @@ class DownloadFileTestCase(unittest.TestCase):
def test_download_with_rename_exists(self): def test_download_with_rename_exists(self):
open(fname('renamed'), 'w').close() open(fname('renamed'), 'w').close()
url = "http://localhost:%d/rename?filename=renamed" % self.listen_port url = self.get_url("rename?filename=renamed")
d = download_file(url, fname("original")) d = download_file(url, fname("original"))
d.addCallback(self.assertEqual, fname("renamed-1")) d.addCallback(self.assertEqual, fname("renamed-1"))
d.addCallback(self.assertContains, "This file should be called renamed") d.addCallback(self.assertContains, "This file should be called renamed")
return d return d
def test_download_with_rename_sanitised(self): def test_download_with_rename_sanitised(self):
url = "http://localhost:%d/rename?filename=/etc/passwd" % self.listen_port url = self.get_url("rename?filename=/etc/passwd")
d = download_file(url, fname("original")) d = download_file(url, fname("original"))
d.addCallback(self.assertEqual, fname("passwd")) d.addCallback(self.assertEqual, fname("passwd"))
d.addCallback(self.assertContains, "This file should be called /etc/passwd") d.addCallback(self.assertContains, "This file should be called /etc/passwd")
return d return d
def test_download_with_rename_prevented(self): def test_download_with_rename_prevented(self):
url = "http://localhost:%d/rename?filename=spam" % self.listen_port url = self.get_url("rename?filename=spam")
d = download_file(url, fname("forced"), force_filename=True) d = download_file(url, fname("forced"), force_filename=True)
d.addCallback(self.assertEqual, fname("forced")) d.addCallback(self.assertEqual, fname("forced"))
d.addCallback(self.assertContains, "This file should be called spam") d.addCallback(self.assertContains, "This file should be called spam")
return d return d
def test_download_with_gzip_encoding(self): def test_download_with_gzip_encoding(self):
url = "http://localhost:%d/gzip?msg=success" % self.listen_port url = self.get_url("gzip?msg=success")
d = download_file(url, fname("gzip_encoded")) d = download_file(url, fname("gzip_encoded"))
d.addCallback(self.assertContains, "success") d.addCallback(self.assertContains, "success")
return d return d
def test_download_with_gzip_encoding_disabled(self): def test_download_with_gzip_encoding_disabled(self):
url = "http://localhost:%d/gzip?msg=fail" % self.listen_port url = self.get_url("gzip?msg=fail")
d = download_file(url, fname("gzip_encoded"), allow_compression=False) d = download_file(url, fname("gzip_encoded"), allow_compression=False)
d.addCallback(self.failIfContains, "fail") d.addCallback(self.failIfContains, "fail")
return d return d
def test_page_redirect(self): def test_page_redirect_unhandled(self):
url = 'http://localhost:%d/redirect' % self.listen_port url = self.get_url("redirect")
d = download_file(url, fname("none")) d = download_file(url, fname("none"))
d.addCallback(self.fail) d.addCallback(self.fail)
d.addErrback(self.assertIsInstance, Failure)
def on_redirect(failure):
self.assertTrue(type(failure), PageRedirect)
d.addErrback(on_redirect)
return d
def test_page_redirect(self):
url = self.get_url("redirect")
d = download_file(url, fname("none"), handle_redirects=True)
d.addCallback(self.assertEqual, fname("none"))
d.addErrback(self.fail)
return d return d
def test_page_not_found(self): def test_page_not_found(self):
d = download_file("http://localhost:%d/page/not/found" % self.listen_port, fname("none")) d = download_file(self.get_url("page/not/found"), fname("none"))
d.addCallback(self.fail) d.addCallback(self.fail)
d.addErrback(self.assertIsInstance, Failure) d.addErrback(self.assertIsInstance, Failure)
return d return d
def test_page_not_modified(self): def test_page_not_modified(self):
headers = {'If-Modified-Since': formatdate(usegmt=True)} headers = {'If-Modified-Since': formatdate(usegmt=True)}
d = download_file("http://localhost:%d/" % self.listen_port, fname("index.html"), headers=headers) d = download_file(self.get_url(), fname("index.html"), headers=headers)
d.addCallback(self.fail) d.addCallback(self.fail)
d.addErrback(self.assertIsInstance, Failure) d.addErrback(self.assertIsInstance, Failure)
return d return d

View file

@ -11,13 +11,10 @@ import base64
import cgi import cgi
import logging import logging
import os import os
from urlparse import urljoin
import gobject import gobject
import gtk import gtk
import pygtk import pygtk
import twisted.web.client
import twisted.web.error
import deluge.common import deluge.common
import deluge.component as component import deluge.component as component
@ -641,25 +638,16 @@ class AddTorrentDialog(component.Component):
pb.set_text("%s" % deluge.common.fsize(current_length)) pb.set_text("%s" % deluge.common.fsize(current_length))
def on_download_success(result): def on_download_success(result):
log.debug("Download success!")
self.add_from_files([result]) self.add_from_files([result])
dialog.destroy() dialog.destroy()
def on_download_fail(result): def on_download_fail(result):
if result.check(twisted.web.error.PageRedirect): log.debug("Download failed: %s", result)
new_url = urljoin(url, result.getErrorMessage().split(" to ")[1]) dialog.destroy()
result = download_file(new_url, tmp_file, on_part) ErrorDialog(
result.addCallbacks(on_download_success, on_download_fail) _("Download Failed"), "%s %s" % (_("Failed to download:"), url),
elif result.check(twisted.web.client.PartialDownloadError): details=result.getErrorMessage(), parent=self.dialog
result = download_file(url, tmp_file, on_part, allow_compression=False) ).run()
result.addCallbacks(on_download_success, on_download_fail)
else:
log.debug("Download failed: %s", result)
dialog.destroy()
ErrorDialog(
_("Download Failed"), "%s %s" % (_("Failed to download:"), url),
details=result.getErrorMessage(), parent=self.dialog
).run()
return result return result
d = download_file(url, tmp_file, on_part) d = download_file(url, tmp_file, on_part)

View file

@ -17,10 +17,7 @@ import tempfile
import time import time
from types import FunctionType from types import FunctionType
from urllib import unquote_plus from urllib import unquote_plus
from urlparse import urljoin
import twisted.web.client
import twisted.web.error
from twisted.internet import reactor from twisted.internet import reactor
from twisted.internet.defer import Deferred, DeferredList from twisted.internet.defer import Deferred, DeferredList
from twisted.web import http, resource, server from twisted.web import http, resource, server
@ -628,17 +625,7 @@ class WebApi(JSONComponent):
return result return result
def on_download_fail(result): def on_download_fail(result):
if result.check(twisted.web.error.PageRedirect): log.error("Failed to add torrent from url %s", url)
new_url = urljoin(url, result.getErrorMessage().split(" to ")[1])
result = httpdownloader.download_file(new_url, tmp_file, headers=headers)
result.addCallbacks(on_download_success, on_download_fail)
elif result.check(twisted.web.client.PartialDownloadError):
result = httpdownloader.download_file(url, tmp_file, headers=headers,
allow_compression=False)
result.addCallbacks(on_download_success, on_download_fail)
else:
log.error("Error occurred downloading torrent from %s", url)
log.error("Reason: %s", result.getErrorMessage())
return result return result
tempdir = tempfile.mkdtemp(prefix="delugeweb-") tempdir = tempfile.mkdtemp(prefix="delugeweb-")