[Core] Support creating v2 torrents

Add support for v2 torrents in create_torrent, but keep the old
default of only adding the v1 metadata.

Unify the single-file and directory cases to avoid code
duplication.

V2 torrents require files to be piece-aligned. The same for
hybrid v1/v2 ones. To handle both cases of piece-aligned and
non-aligned files, always read the files in piece-aligned
chunks. Re-slice the buffer if needed (for v1-only multi-file
torrents).

Also, had to adapt to progress event. It now depends on the
number of bytes hashed rather than the number of pieces. To
avoid sending and excessive amount of event when handling a
directory with many small files, add a mechanism to limit
event period at 1 per piece_length.

Closes: https://github.com/deluge-torrent/deluge/pull/430
This commit is contained in:
Radu Carpa 2023-08-03 09:15:23 +02:00 committed by Calum Lind
commit 1751d62df9
No known key found for this signature in database
GPG key ID: 90597A687B836BA3
3 changed files with 328 additions and 87 deletions

View file

@ -21,7 +21,7 @@ from twisted.web.client import Agent, readBody
import deluge.common import deluge.common
import deluge.component as component import deluge.component as component
from deluge import path_chooser_common from deluge import metafile, path_chooser_common
from deluge._libtorrent import LT_VERSION, lt from deluge._libtorrent import LT_VERSION, lt
from deluge.configmanager import ConfigManager, get_config_dir from deluge.configmanager import ConfigManager, get_config_dir
from deluge.core.alertmanager import AlertManager from deluge.core.alertmanager import AlertManager
@ -998,7 +998,11 @@ class Core(component.Component):
created_by=None, created_by=None,
trackers=None, trackers=None,
add_to_session=False, add_to_session=False,
torrent_format=metafile.TorrentFormat.V1,
): ):
if isinstance(torrent_format, str):
torrent_format = metafile.TorrentFormat(torrent_format)
log.debug('creating torrent..') log.debug('creating torrent..')
return threads.deferToThread( return threads.deferToThread(
self._create_torrent_thread, self._create_torrent_thread,
@ -1012,6 +1016,7 @@ class Core(component.Component):
created_by=created_by, created_by=created_by,
trackers=trackers, trackers=trackers,
add_to_session=add_to_session, add_to_session=add_to_session,
torrent_format=torrent_format,
) )
def _create_torrent_thread( def _create_torrent_thread(
@ -1026,6 +1031,7 @@ class Core(component.Component):
created_by, created_by,
trackers, trackers,
add_to_session, add_to_session,
torrent_format,
): ):
from deluge import metafile from deluge import metafile
@ -1038,6 +1044,7 @@ class Core(component.Component):
private=private, private=private,
created_by=created_by, created_by=created_by,
trackers=trackers, trackers=trackers,
torrent_format=torrent_format,
) )
write_file = False write_file = False

View file

@ -10,10 +10,13 @@
# See LICENSE for more details. # See LICENSE for more details.
# #
import copy
import logging import logging
import os.path import os.path
import time import time
from enum import Enum
from hashlib import sha1 as sha from hashlib import sha1 as sha
from hashlib import sha256
import deluge.component as component import deluge.component as component
from deluge.bencode import bencode from deluge.bencode import bencode
@ -41,6 +44,35 @@ def dummy(*v):
pass pass
class TorrentFormat(str, Enum):
V1 = 'v1'
V2 = 'v2'
HYBRID = 'hybrid'
@classmethod
def _missing_(cls, value):
if not value:
return None
value = value.lower()
for member in cls:
if member.value == value:
return member
def to_lt_flag(self):
if self.value == 'v1':
return 64
if self.value == 'v2':
return 32
return 0
def includes_v1(self):
return self == self.__class__.V1 or self == self.__class__.HYBRID
def includes_v2(self):
return self == self.__class__.V2 or self == self.__class__.HYBRID
class RemoteFileProgress: class RemoteFileProgress:
def __init__(self, session_id): def __init__(self, session_id):
self.session_id = session_id self.session_id = session_id
@ -65,6 +97,7 @@ def make_meta_file_content(
private=False, private=False,
created_by=None, created_by=None,
trackers=None, trackers=None,
torrent_format=TorrentFormat.V1,
): ):
data = {'creation date': int(gmtime())} data = {'creation date': int(gmtime())}
if url: if url:
@ -80,10 +113,20 @@ def make_meta_file_content(
if session_id: if session_id:
progress = RemoteFileProgress(session_id) progress = RemoteFileProgress(session_id)
info = makeinfo(path, piece_length, progress, name, content_type, private) info, piece_layers = makeinfo(
path,
piece_length,
progress,
name,
content_type,
private,
torrent_format,
)
# check_info(info) # check_info(info)
data['info'] = info data['info'] = info
if piece_layers is not None:
data['piece layers'] = piece_layers
if title: if title:
data['title'] = title.encode('utf8') data['title'] = title.encode('utf8')
if comment: if comment:
@ -170,101 +213,237 @@ def calcsize(path):
return total return total
def makeinfo(path, piece_length, progress, name=None, content_type=None, private=False): def _next_pow2(num):
# HEREDAVE. If path is directory, how do we assign content type? import math
path = os.path.abspath(path)
piece_count = 0
if os.path.isdir(path):
subs = sorted(subfiles(path))
pieces = []
sh = sha()
done = 0
fs = []
totalsize = 0.0
totalhashed = 0
for p, f in subs:
totalsize += os.path.getsize(f)
if totalsize >= piece_length:
import math
num_pieces = math.ceil(totalsize / piece_length) if not num:
else: return 1
num_pieces = 1 return 2 ** math.ceil(math.log2(num))
for p, f in subs:
pos = 0 def _sha256_merkle_root(leafs, nb_leafs, padding, in_place=True) -> bytes:
size = os.path.getsize(f) """
p2 = [n.encode('utf8') for n in p] Build the root of the merkle hash tree from the (possibly incomplete) leafs layer.
if content_type: If len(leafs) < nb_leafs, it will be padded with the padding repeated as many times
fs.append( as needed to have nb_leafs in total.
{'length': size, 'path': p2, 'content_type': content_type} """
) # HEREDAVE. bad for batch! if not in_place:
leafs = copy.copy(leafs)
while nb_leafs > 1:
nb_leafs = nb_leafs // 2
for i in range(nb_leafs):
node1 = leafs[2 * i] if 2 * i < len(leafs) else padding
node2 = leafs[2 * i + 1] if 2 * i + 1 < len(leafs) else padding
h = sha256(node1)
h.update(node2)
if i < len(leafs):
leafs[i] = h.digest()
else: else:
fs.append({'length': size, 'path': p2}) leafs.append(h.digest())
with open(f, 'rb') as file_: return leafs[0] if leafs else padding
while pos < size:
a = min(size - pos, piece_length - done)
sh.update(file_.read(a))
done += a
pos += a
totalhashed += a
if done == piece_length:
pieces.append(sh.digest())
piece_count += 1
done = 0
sh = sha()
progress(piece_count, num_pieces)
if done > 0:
pieces.append(sh.digest())
piece_count += 1
progress(piece_count, num_pieces)
def _sha256_buffer_blocks(buffer, block_len):
import math
nb_blocks = math.ceil(len(buffer) / block_len)
blocks = [
sha256(buffer[i * block_len : (i + 1) * block_len]).digest()
for i in range(nb_blocks)
]
return blocks
def makeinfo_lt(
path, piece_length, name=None, private=False, torrent_format=TorrentFormat.V1
):
"""
Make info using via the libtorrent library.
"""
from deluge._libtorrent import lt
if not name:
name = os.path.split(path)[1]
fs = lt.file_storage()
if os.path.isfile(path):
lt.add_files(fs, path)
else:
for p, f in subfiles(path):
fs.add_file(os.path.join(name, *p), os.path.getsize(f))
torrent = lt.create_torrent(
fs, piece_size=piece_length, flags=torrent_format.to_lt_flag()
)
lt.set_piece_hashes(torrent, os.path.dirname(path))
torrent.set_priv(private)
t = torrent.generate()
info = t[b'info']
pieces_layers = t.get(b'piece layers', None)
return info, pieces_layers
def makeinfo(
path,
piece_length,
progress,
name=None,
content_type=None,
private=False,
torrent_format=TorrentFormat.V1,
):
# HEREDAVE. If path is directory, how do we assign content type?
v2_block_len = 2**14 # 16 KiB
v2_blocks_per_piece = 1
v2_block_padding = b''
v2_piece_padding = b''
if torrent_format.includes_v2():
if _next_pow2(piece_length) != piece_length or piece_length < v2_block_len:
raise ValueError(
'Bittorrent v2 piece size must be a power of 2; and bigger than 16 KiB'
)
v2_blocks_per_piece = piece_length // v2_block_len
v2_block_padding = bytes(32) # 32 = size of sha256 in bytes
v2_piece_padding = _sha256_merkle_root(
[], nb_leafs=v2_blocks_per_piece, padding=v2_block_padding
)
path = os.path.abspath(path)
files = []
pieces = []
file_tree = {}
piece_layers = {}
if os.path.isdir(path):
if not name: if not name:
name = os.path.split(path)[1] name = os.path.split(path)[1]
subs = subfiles(path)
return { if torrent_format.includes_v2():
'pieces': b''.join(pieces), subs = sorted(subs)
'piece length': piece_length, length = None
'files': fs, totalsize = 0.0
'name': name.encode('utf8'), for p, f in subs:
'private': private, totalsize += os.path.getsize(f)
}
else: else:
size = os.path.getsize(path) name = os.path.split(path)[1]
if size >= piece_length: subs = [([name], path)]
num_pieces = size // piece_length length = os.path.getsize(path)
else: totalsize = length
num_pieces = 1 is_multi_file = len(subs) > 1
sh = sha()
done = 0
totalhashed = 0
pieces = [] next_progress_event = piece_length
p = 0 for p, f in subs:
with open(path, 'rb') as _file: file_pieces_v2 = []
while p < size: pos = 0
x = _file.read(min(piece_length, size - p)) size = os.path.getsize(f)
pieces.append(sha(x).digest()) p2 = [n.encode('utf8') for n in p]
piece_count += 1 if content_type:
p += piece_length files.append(
if p > size: {b'length': size, b'path': p2, b'content_type': content_type}
p = size ) # HEREDAVE. bad for batch!
progress(piece_count, num_pieces) else:
name = os.path.split(path)[1].encode('utf8') files.append({b'length': size, b'path': p2})
if content_type is not None: with open(f, 'rb') as file_:
return { while pos < size:
'pieces': b''.join(pieces), to_read = min(size - pos, piece_length)
'piece length': piece_length, buffer = memoryview(file_.read(to_read))
'length': size, pos += to_read
'name': name,
'content_type': content_type, if torrent_format.includes_v1():
'private': private, a = piece_length - done
for sub_buffer in (buffer[:a], buffer[a:]):
if sub_buffer:
sh.update(sub_buffer)
done += len(sub_buffer)
if done == piece_length:
pieces.append(sh.digest())
done = 0
sh = sha()
if torrent_format.includes_v2():
block_hashes = _sha256_buffer_blocks(buffer, v2_block_len)
num_leafs = v2_blocks_per_piece
if size <= piece_length:
# The special case when the file is smaller than a piece: only pad till the next power of 2
num_leafs = _next_pow2(len(block_hashes))
root = _sha256_merkle_root(
block_hashes, num_leafs, v2_block_padding, in_place=True
)
file_pieces_v2.append(root)
totalhashed += to_read
if totalhashed >= next_progress_event:
next_progress_event = totalhashed + piece_length
progress(totalhashed, totalsize)
if torrent_format == TorrentFormat.HYBRID and is_multi_file and done > 0:
# Add padding file to force piece-alignment
padding = piece_length - done
sh.update(bytes(padding))
files.append(
{
b'length': padding,
b'attr': b'p',
b'path': [b'.pad', str(padding).encode()],
}
)
pieces.append(sh.digest())
done = 0
sh = sha()
if torrent_format.includes_v2():
# add file to the `file tree` and, if needed, to the `piece layers` structures
pieces_root = _sha256_merkle_root(
file_pieces_v2,
_next_pow2(len(file_pieces_v2)),
v2_piece_padding,
in_place=False,
)
dst_directory = file_tree
for directory in p2[:-1]:
dst_directory = dst_directory.setdefault(directory, {})
dst_directory[p2[-1]] = {
b'': {
b'length': size,
b'pieces root': pieces_root,
}
} }
return { if len(file_pieces_v2) > 1:
'pieces': b''.join(pieces), piece_layers[pieces_root] = b''.join(file_pieces_v2)
'piece length': piece_length,
'length': size, if done > 0:
'name': name, pieces.append(sh.digest())
'private': private, progress(totalsize, totalsize)
}
info = {
b'piece length': piece_length,
b'name': name.encode('utf8'),
}
if private:
info[b'private'] = 1
if content_type:
info[b'content_type'] = content_type
if torrent_format.includes_v1():
info[b'pieces'] = b''.join(pieces)
if is_multi_file:
info[b'files'] = files
else:
info[b'length'] = length
if torrent_format.includes_v2():
info.update(
{
b'meta version': 2,
b'file tree': file_tree,
}
)
return info, piece_layers if torrent_format.includes_v2() else None
def subfiles(d): def subfiles(d):

View file

@ -7,7 +7,13 @@
import os import os
import tempfile import tempfile
import pytest
from deluge import metafile from deluge import metafile
from deluge._libtorrent import LT_VERSION
from deluge.common import VersionSplit
from . import common
def check_torrent(filename): def check_torrent(filename):
@ -55,3 +61,52 @@ class TestMetafile:
metafile.make_meta_file(tmp_data, '', 32768, target=tmp_torrent) metafile.make_meta_file(tmp_data, '', 32768, target=tmp_torrent)
check_torrent(tmp_torrent) check_torrent(tmp_torrent)
@pytest.mark.parametrize(
'path',
[
common.get_test_data_file('deluge.png'),
common.get_test_data_file('unicode_filenames.torrent'),
os.path.dirname(common.get_test_data_file('deluge.png')),
],
)
@pytest.mark.parametrize(
'torrent_format',
[
metafile.TorrentFormat.V1,
metafile.TorrentFormat.V2,
metafile.TorrentFormat.HYBRID,
],
)
@pytest.mark.parametrize('piece_length', [2**14, 2**15, 2**16])
@pytest.mark.parametrize('private', [True, False])
def test_create_info(self, path, torrent_format, piece_length, private):
our_info, our_piece_layers = metafile.makeinfo(
path,
piece_length,
metafile.dummy,
private=private,
torrent_format=torrent_format,
)
lt_info, lt_piece_layers = metafile.makeinfo_lt(
path,
piece_length,
private=private,
torrent_format=torrent_format,
)
if (
torrent_format == metafile.TorrentFormat.HYBRID
and os.path.isdir(path)
and VersionSplit(LT_VERSION) <= VersionSplit('2.0.7.0')
):
# Libtorrent didn't correctly follow the standard until version 2.0.7 included
# https://github.com/arvidn/libtorrent/commit/74d82a0cd7c2e9e3c4294901d7eb65e247050df4
# If last file is a padding, ignore that file and the last piece.
if our_info[b'files'][-1][b'path'][0] == b'.pad':
our_info[b'files'] = our_info[b'files'][:-1]
our_info[b'pieces'] = our_info[b'pieces'][:-32]
lt_info[b'pieces'] = lt_info[b'pieces'][:-32]
assert our_info == lt_info
assert our_piece_layers == lt_piece_layers