#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2022/8/31 23:34 # @Author : old tom # @File : http_downloader.py # @Project : Futool # @Desc : 文件下载器 import time from futool.http.http_request import head from multiprocessing import Pool import urllib.request as req class HttpDownloader(object): """ HTTP 下载器 """ def __init__(self, pool=None): self.pool = Pool(16) if not pool else pool def download(self, url, dst, chunk_size=1000): """ 文件下,自动开启多线程 :param url: 下载链接 :param dst: 保存路径 :param chunk_size: 文件块 :return: """ is_support, content_length = HttpDownloader.is_support_range(url) if is_support: # 每个线程下载字节偏移量 offset = self.fork(int(content_length), chunk_size) self.__join(offset, url, dst) else: print('无法获取Content-Length,使用单线程下载') pass @staticmethod def is_support_range(url): """ 判断是否支持range请求 :return: """ wrapper = head(url) header = wrapper.header() h_keys = header.keys() if 'Accept-Ranges' in h_keys and 'Content-Length' in h_keys and header['Accept-Ranges'] != 'none': return True, header['Content-Length'] else: return False, 0 @staticmethod def fork(content_length: int, chunk_size): """ 拆分线程 :param chunk_size: 文件块大小 :param content_length: :return: """ offset = [] if content_length <= chunk_size: offset.append((0, content_length)) else: for i in range(content_length // chunk_size): start_offset = chunk_size * i + 1 end_offset = start_offset - 1 + chunk_size offset.append((0 if i == 0 else start_offset, end_offset)) offset.append((chunk_size * (content_length // chunk_size), content_length)) return offset def __join(self, offset, url, dst): """ 多线程下载 :param offset: :param url: :param dst: :return: """ def download_by_thread(part): _request = req.Request(url=url, headers={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.70", 'Range': f'bytes:{part[0]}-{part[1]}' }, method='GET') response = req.urlopen(_request) with open(dst + f'.{time.time_ns()}', 'wb') as f: f.write(response.read()) self.pool.map(download_by_thread, offset) self.pool.close() self.pool.join()