You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
95 lines
2.9 KiB
95 lines
2.9 KiB
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
# @Time : 2022/8/31 23:34
|
|
# @Author : old tom
|
|
# @File : http_downloader.py
|
|
# @Project : Futool
|
|
# @Desc : 文件下载器
|
|
import time
|
|
|
|
from futool.http.http_request import head
|
|
from multiprocessing import Pool
|
|
import urllib.request as req
|
|
|
|
|
|
class HttpDownloader(object):
|
|
"""
|
|
HTTP 下载器
|
|
"""
|
|
|
|
def __init__(self, pool=None):
|
|
self.pool = Pool(16) if not pool else pool
|
|
|
|
def download(self, url, dst, chunk_size=1000):
|
|
"""
|
|
文件下,自动开启多线程
|
|
:param url: 下载链接
|
|
:param dst: 保存路径
|
|
:param chunk_size: 文件块
|
|
:return:
|
|
"""
|
|
is_support, content_length = HttpDownloader.is_support_range(url)
|
|
if is_support:
|
|
# 每个线程下载字节偏移量
|
|
offset = self.fork(int(content_length), chunk_size)
|
|
self.__join(offset, url, dst)
|
|
else:
|
|
print('无法获取Content-Length,使用单线程下载')
|
|
pass
|
|
|
|
@staticmethod
|
|
def is_support_range(url):
|
|
"""
|
|
判断是否支持range请求
|
|
:return:
|
|
"""
|
|
wrapper = head(url)
|
|
header = wrapper.header()
|
|
h_keys = header.keys()
|
|
if 'Accept-Ranges' in h_keys and 'Content-Length' in h_keys and header['Accept-Ranges'] != 'none':
|
|
return True, header['Content-Length']
|
|
else:
|
|
return False, 0
|
|
|
|
@staticmethod
|
|
def fork(content_length: int, chunk_size):
|
|
"""
|
|
拆分线程
|
|
:param chunk_size: 文件块大小
|
|
:param content_length:
|
|
:return:
|
|
"""
|
|
offset = []
|
|
if content_length <= chunk_size:
|
|
offset.append((0, content_length))
|
|
else:
|
|
for i in range(content_length // chunk_size):
|
|
start_offset = chunk_size * i + 1
|
|
end_offset = start_offset - 1 + chunk_size
|
|
offset.append((0 if i == 0 else start_offset, end_offset))
|
|
offset.append((chunk_size * (content_length // chunk_size), content_length))
|
|
return offset
|
|
|
|
def __join(self, offset, url, dst):
|
|
"""
|
|
多线程下载
|
|
:param offset:
|
|
:param url:
|
|
:param dst:
|
|
:return:
|
|
"""
|
|
|
|
def download_by_thread(part):
|
|
_request = req.Request(url=url, headers={
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
"Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.70",
|
|
'Range': f'bytes:{part[0]}-{part[1]}'
|
|
}, method='GET')
|
|
response = req.urlopen(_request)
|
|
with open(dst + f'.{time.time_ns()}', 'wb') as f:
|
|
f.write(response.read())
|
|
|
|
self.pool.map(download_by_thread, offset)
|
|
self.pool.close()
|
|
self.pool.join()
|