You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

95 lines
2.9 KiB

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2022/8/31 23:34
# @Author : old tom
# @File : http_downloader.py
# @Project : Futool
# @Desc : 文件下载器
import time
from futool.http.http_request import head
from multiprocessing import Pool
import urllib.request as req
class HttpDownloader(object):
"""
HTTP 下载器
"""
def __init__(self, pool=None):
self.pool = Pool(16) if not pool else pool
def download(self, url, dst, chunk_size=1000):
"""
文件下,自动开启多线程
:param url: 下载链接
:param dst: 保存路径
:param chunk_size: 文件块
:return:
"""
is_support, content_length = HttpDownloader.is_support_range(url)
if is_support:
# 每个线程下载字节偏移量
offset = self.fork(int(content_length), chunk_size)
self.__join(offset, url, dst)
else:
print('无法获取Content-Length,使用单线程下载')
pass
@staticmethod
def is_support_range(url):
"""
判断是否支持range请求
:return:
"""
wrapper = head(url)
header = wrapper.header()
h_keys = header.keys()
if 'Accept-Ranges' in h_keys and 'Content-Length' in h_keys and header['Accept-Ranges'] != 'none':
return True, header['Content-Length']
else:
return False, 0
@staticmethod
def fork(content_length: int, chunk_size):
"""
拆分线程
:param chunk_size: 文件块大小
:param content_length:
:return:
"""
offset = []
if content_length <= chunk_size:
offset.append((0, content_length))
else:
for i in range(content_length // chunk_size):
start_offset = chunk_size * i + 1
end_offset = start_offset - 1 + chunk_size
offset.append((0 if i == 0 else start_offset, end_offset))
offset.append((chunk_size * (content_length // chunk_size), content_length))
return offset
def __join(self, offset, url, dst):
"""
多线程下载
:param offset:
:param url:
:param dst:
:return:
"""
def download_by_thread(part):
_request = req.Request(url=url, headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.70",
'Range': f'bytes:{part[0]}-{part[1]}'
}, method='GET')
response = req.urlopen(_request)
with open(dst + f'.{time.time_ns()}', 'wb') as f:
f.write(response.read())
self.pool.map(download_by_thread, offset)
self.pool.close()
self.pool.join()