折腾:
【未解决】Python给requests下载文件加速提升速度
期间,顺带参考
去加上:
在下载期间,就实时统计下载速度:从开始下载到当前已下载的最新平均速度
以及后来想到了,可以通过计算chunk/时间 得到 实时的当前chunk的下载速度
当前代码稍加调整后是:
src/common/DownloadApps.py
def download(self,task): 。。。 totalSizeStr = "" try: r1 = requests.get(url, stream=True) total_size = int(r1.headers['Content-Length']) # 447681304 totalSizeStr = CommonUtils.formatSize(total_size) logging.info("app total size: %s", totalSizeStr) except: total_size = None temp_size = os.path.getsize(filepath) if os.path.exists(filepath) else 0 headers = { 'Range': 'bytes=%d-' % temp_size, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36", } r = requests.get(url, stream=True, headers=headers) # ChunkSize = 1024*1024*10 # 10MB ChunkSize = 1024*1024*20 # 20MB with open(filepath, "ab") as f: for chunk in r.iter_content(chunk_size=ChunkSize): if chunk: temp_size += len(chunk) f.write(chunk) f.flush() ratio = round(100 * temp_size / total_size,2) if total_size is not None else "unknown" tempSizeStr = CommonUtils.formatSize(temp_size) logging.info("download {0} {1}/{2} {3}%".format(appname, tempSizeStr, totalSizeStr, ratio)) downloadTimeSecondsFloat = CommonUtils.calcTimeEnd(downloadCalcTimeKey) # 259.60212874412537 downloadTimeDict = CommonUtils.floatSecondsToDatetimeDict(downloadTimeSecondsFloat) # {'days': 0, 'hours': 0, 'microseconds': 128, 'millseconds': 602, 'minutes': 4, 'seconds': 19} del downloadTimeDict["millseconds"] del downloadTimeDict["microseconds"] downloadTimeStr = CommonUtils.datetimeDictToStr(downloadTimeDict) # '00:04:19' if total_size: totalSizeStr = CommonUtils.formatSize(total_size) # '426.9MB' speedFloat = total_size / downloadTimeSecondsFloat # 1724490.1117172781 speedStr = "%s/s" % CommonUtils.formatSize(speedFloat) # '1.6MB/s' else: totalSizeStr = "Unkown" speedStr = "Unkown" # logging.info("download {} end".format(appname)) logging.info("download %s end, size=%s, time=%s, speed=%s", appname, totalSizeStr, downloadTimeStr, speedStr)
继续再去优化,加上瞬时,实时下载速度
# ChunkSize = 1024*1024*10 # 10MB ChunkSize = 1024*1024*20 # 20MB with open(filepath, "ab") as f: startTime = time.time() prevTime = startTime for chunk in r.iter_content(chunk_size=ChunkSize): if chunk: curTime = time.time() downloadedSize += len(chunk) f.write(chunk) f.flush() if total_size is not None: downloadedPercent = round(100 * downloadedSize / total_size, 2) else: downloadedPercent = "unknown" downloadedSizeStr = CommonUtils.formatSize(downloadedSize) curChunkTime = curTime - prevTime instantSpeed = chunk / curChunkTime instantSpeedStr = CommonUtils.formatSize(instantSpeed) curDownloadedTime = curTime - startTime averageSpeed = downloadedSize / curDownloadedTime averageSpeedStr = CommonUtils.formatSize(averageSpeed) logging.info("download %s size: %s/%s %s%%, speed: avg=%s now=%s", appname, downloadedSizeStr, totalSizeStr, downloadedPercent, averageSpeedStr, instantSpeedStr) prevTime = curTime
抽空去调试看看
【总结】
优化后代码:
headers = { 'Range': 'bytes=%d-' % totalDownloadedSize, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36", } r = requests.get(url, stream=True, headers=headers) ChunkSize = 1024*1024*10 # 10MB # ChunkSize = 1024*1024*20 # 20MB with open(filepath, "ab") as f: startTime = time.time() prevTime = startTime for chunkBytes in r.iter_content(chunk_size=ChunkSize): if chunkBytes: curDownloadedSize = len(chunkBytes) # 10485760 curTime = time.time() # 1606456020.0718982 totalDownloadedSize += curDownloadedSize # 10485760 f.write(chunkBytes) f.flush() if total_size is not None: downloadedPercent = round(100 * totalDownloadedSize / total_size, 2) # 0.7 else: downloadedPercent = "Unknown" downloadedSizeStr = CommonUtils.formatSize(totalDownloadedSize) # '10.0MB' curDownloadTime = curTime - prevTime # 15.63818907737732 instantSpeed = curDownloadedSize / curDownloadTime # 670522.651191692 instantSpeedStr = CommonUtils.formatSize(instantSpeed) # '654.8KB' totalDownloadTime = curTime - startTime # 15.63818907737732 averageSpeed = totalDownloadedSize / totalDownloadTime # 670522.651191692 averageSpeedStr = CommonUtils.formatSize(averageSpeed) # '654.8KB' logging.info("download %s size: %s/%s %s%%, speed: avg=%s now=%s", appname, downloadedSizeStr, totalSizeStr, downloadedPercent, averageSpeedStr, instantSpeedStr) prevTime = curTime 。。。
多轮调试后,输出:
[201127 13:46:44][DownloadApps.py 86 ] start to download fknsg2/放开那三国2,url https://gameapktxdl.vivo.com.cn/appstore/developer/soft/20201027/202010271738254e7s6.apk [201127 13:46:44][DownloadApps.py 98 ] app total size: 1.4GB [201127 13:48:12][DownloadApps.py 136] download fknsg2/放开那三国2 size: 10.0MB/1.4GB 0.7%, speed: avg=654.8KB now=654.8KB [201127 13:52:39][DownloadApps.py 136] download fknsg2/放开那三国2 size: 20.0MB/1.4GB 1.4%, speed: avg=190.3KB now=111.4KB [201127 13:53:05][DownloadApps.py 136] download fknsg2/放开那三国2 size: 30.0MB/1.4GB 2.11%, speed: avg=82.5KB now=38.6KB [201127 13:53:12][DownloadApps.py 136] download fknsg2/放开那三国2 size: 40.0MB/1.4GB 2.81%, speed: avg=105.5KB now=659.6KB [201127 13:53:21][DownloadApps.py 136] download fknsg2/放开那三国2 size: 50.0MB/1.4GB 3.51%, speed: avg=129.0KB now=1.1MB [201127 13:53:30][DownloadApps.py 136] download fknsg2/放开那三国2 size: 60.0MB/1.4GB 4.21%, speed: avg=151.5KB now=1.1MB [201127 13:53:36][DownloadApps.py 136] download fknsg2/放开那三国2 size: 70.0MB/1.4GB 4.91%, speed: avg=174.0KB now=1.6MB [201127 13:53:43][DownloadApps.py 136] download fknsg2/放开那三国2 size: 80.0MB/1.4GB 5.62%, speed: avg=195.5KB now=1.4MB [201127 13:53:49][DownloadApps.py 136] download fknsg2/放开那三国2 size: 90.0MB/1.4GB 6.32%, speed: avg=216.7KB now=1.6MB [201127 13:53:55][DownloadApps.py 136] download fknsg2/放开那三国2 size: 100.0MB/1.4GB 7.02%, speed: avg=237.5KB now=1.7MB
就正常了。
不过平均速度很慢。
突然发现是调试导致的。
重新运行,不加断点,试试
果然正常了:
[201127 13:56:11][DownloadApps.py 86 ] start to download fknsg2/放开那三国2,url https://gameapktxdl.vivo.com.cn/appstore/developer/soft/20201027/202010271738254e7s6.apk [201127 13:56:11][DownloadApps.py 98 ] app total size: 1.4GB [201127 13:56:18][DownloadApps.py 136] download fknsg2/放开那三国2 size: 10.0MB/1.4GB 0.7%, speed: avg=1.5MB now=1.5MB [201127 13:56:24][DownloadApps.py 136] download fknsg2/放开那三国2 size: 20.0MB/1.4GB 1.4%, speed: avg=1.6MB now=1.7MB [201127 13:56:30][DownloadApps.py 136] download fknsg2/放开那三国2 size: 30.0MB/1.4GB 2.11%, speed: avg=1.6MB now=1.5MB [201127 13:56:39][DownloadApps.py 136] download fknsg2/放开那三国2 size: 40.0MB/1.4GB 2.81%, speed: avg=1.4MB now=1.2MB [201127 13:56:46][DownloadApps.py 136] download fknsg2/放开那三国2 size: 50.0MB/1.4GB 3.51%, speed: avg=1.5MB now=1.5MB [201127 13:56:52][DownloadApps.py 136] download fknsg2/放开那三国2 size: 60.0MB/1.4GB 4.21%, speed: avg=1.5MB now=1.6MB [201127 13:57:00][DownloadApps.py 136] download fknsg2/放开那三国2 size: 70.0MB/1.4GB 4.91%, speed: avg=1.4MB now=1.2MB [201127 13:57:08][DownloadApps.py 136] download fknsg2/放开那三国2 size: 80.0MB/1.4GB 5.62%, speed: avg=1.4MB now=1.2MB
速度符合预期。
为了方便查看已下载比例,调整参数位置:
logging.info("download %s speed: avg=%s now=%s, size: %s/%s %s%%", appname, averageSpeedStr, instantSpeedStr, downloadedSizeStr, totalSizeStr, downloadedPercent)
输出:
再去加个总时长吧
totalDownloadTime = curTime - startTime # 15.63818907737732 averageSpeed = totalDownloadedSize / totalDownloadTime # 670522.651191692 averageSpeedStr = CommonUtils.formatSize(averageSpeed) # '654.8KB' totalDownloadTimeDict = CommonUtils.floatSecondsToDatetimeDict(totalDownloadTime) totalDownloadTimeStr = CommonUtils.datetimeDictToStr(totalDownloadTimeDict, isShowMilliSecPart=False) logging.info("download %s speed: now=%s/s, time: total=%s, size: %s %s%%", appname, instantSpeedStr, totalDownloadTimeStr, downloadedSizeStr, downloadedPercent)
效果:
[201127 14:26:33][DownloadApps.py 86 ] start to download gcld/攻城掠地,url https://gameapktxdl.vivo.com.cn/appstore/developer/soft/20201026/202010261519026im2p.apk [201127 14:26:33][DownloadApps.py 98 ] app total size: 596.4MB [201127 14:26:39][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:00:05, size: 10.0MB 1.68% [201127 14:26:45][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:00:11, size: 20.0MB 3.35% [201127 14:26:51][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:00:17, size: 30.0MB 5.03%
即可。
【总结】
此处最后代码:
def download(self,task): 。。。 totalSizeStr = "" try: r1 = requests.get(url, stream=True) total_size = int(r1.headers['Content-Length']) # 447681304 totalSizeStr = CommonUtils.formatSize(total_size) logging.info("app total size: %s", totalSizeStr) except: logging.warning("Fail to get total size 'Content-Length' from %s", url) total_size = None totalDownloadedSize = os.path.getsize(filepath) if os.path.exists(filepath) else 0 headers = { 'Range': 'bytes=%d-' % totalDownloadedSize, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36", } r = requests.get(url, stream=True, headers=headers) ChunkSize = 1024*1024*10 # 10MB # ChunkSize = 1024*1024*20 # 20MB with open(filepath, "ab") as f: startTime = time.time() prevTime = startTime for chunkBytes in r.iter_content(chunk_size=ChunkSize): if chunkBytes: curDownloadedSize = len(chunkBytes) # 10485760 curTime = time.time() # 1606456020.0718982 totalDownloadedSize += curDownloadedSize # 10485760 f.write(chunkBytes) f.flush() if total_size is not None: downloadedPercent = round(100 * totalDownloadedSize / total_size, 2) # 0.7 else: downloadedPercent = "Unknown" downloadedSizeStr = CommonUtils.formatSize(totalDownloadedSize) # '10.0MB' curDownloadTime = curTime - prevTime # 15.63818907737732 curSpeed = curDownloadedSize / curDownloadTime # 670522.651191692 curSpeedStr = CommonUtils.formatSize(curSpeed) # '654.8KB' totalDownloadTime = curTime - startTime # 15.63818907737732 averageSpeed = totalDownloadedSize / totalDownloadTime # 670522.651191692 averageSpeedStr = CommonUtils.formatSize(averageSpeed) # '654.8KB' totalDownloadTimeDict = CommonUtils.floatSecondsToDatetimeDict(totalDownloadTime) totalDownloadTimeStr = CommonUtils.datetimeDictToStr(totalDownloadTimeDict, isShowMilliSecPart=False) logging.info("download %s speed: cur=%s/s, time: total=%s, size: %s %s%%", appname, curSpeedStr, totalDownloadTimeStr, downloadedSizeStr, downloadedPercent) prevTime = curTime downloadTimeSecondsFloat = CommonUtils.calcTimeEnd(downloadCalcTimeKey) # 259.60212874412537 downloadTimeDict = CommonUtils.floatSecondsToDatetimeDict(downloadTimeSecondsFloat) # {'days': 0, 'hours': 0, 'microseconds': 128, 'millseconds': 602, 'minutes': 4, 'seconds': 19} del downloadTimeDict["millseconds"] del downloadTimeDict["microseconds"] downloadTimeStr = CommonUtils.datetimeDictToStr(downloadTimeDict) # '00:04:19' if total_size: totalSizeStr = CommonUtils.formatSize(total_size) # '426.9MB' speedFloat = total_size / downloadTimeSecondsFloat # 1724490.1117172781 speedStr = "%s/s" % CommonUtils.formatSize(speedFloat) # '1.6MB/s' else: totalSizeStr = "Unknown" speedStr = "Unknown" # logging.info("download {} end".format(appname)) logging.info("download %s end, size=%s, time=%s, speed=%s", appname, totalSizeStr, downloadTimeStr, speedStr)
输出效果:
[201127 14:26:33][DownloadApps.py 86 ] start to download gcld/攻城掠地,url https://gameapktxdl.vivo.com.cn/appstore/developer/soft/20201026/202010261519026im2p.apk [201127 14:26:33][DownloadApps.py 98 ] app total size: 596.4MB [201127 14:26:39][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:00:05, size: 10.0MB 1.68% [201127 14:26:45][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:00:11, size: 20.0MB 3.35% [201127 14:26:51][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:00:17, size: 30.0MB 5.03% [201127 14:26:56][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.8MB/s, time: total=00:00:23, size: 40.0MB 6.71% [201127 14:27:02][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.6MB/s, time: total=00:00:29, size: 50.0MB 8.38% 。。。 [201127 14:27:28][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.4MB/s, time: total=00:00:55, size: 90.0MB 15.09% [201127 14:27:34][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.6MB/s, time: total=00:01:01, size: 100.0MB 16.77% 。。。 [201127 14:28:57][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.2MB/s, time: total=00:02:23, size: 230.0MB 38.56% [201127 14:29:03][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:02:29, size: 240.0MB 40.24% [201127 14:29:08][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.8MB/s, time: total=00:02:35, size: 250.0MB 41.92% 。。。 [201127 14:32:26][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.8MB/s, time: total=00:05:53, size: 590.0MB 98.92% [201127 14:32:30][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.8MB/s, time: total=00:05:56, size: 596.4MB 100.0% [201127 14:32:30][DownloadApps.py 157] download gcld/攻城掠地 end, size=596.4MB, time=00:05:57, speed=1.7MB/s
即可。
【后记 20201214】
当支持断点续传后,计算总下载速度有误,去更新为:
hasDownloadedSize = os.path.getsize(filepath) if os.path.exists(filepath) else 0 if totalFileSize: hasDownloadPercent = float(hasDownloadedSize / totalFileSize) # 293601280 / 304130341 = 0.9653797744566367 else: hasDownloadPercent = 0.0 hasDownloadPercent100 = hasDownloadPercent * 100 logging.info("Has downloaded %.2f%%", hasDownloadPercent100) curDownloadedSize = 0 headers = { 'Range': 'bytes=%d-' % hasDownloadedSize, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36", } r = requests.get(url, stream=True, headers=headers) # ChunkSize = 1024*1024*10 # 10MB ChunkSize = 1024*1024*20 # 20MB with open(filepath, "ab") as f: startTime = time.time() prevTime = startTime for chunkBytes in r.iter_content(chunk_size=ChunkSize): if chunkBytes: curChunkSize = len(chunkBytes) # 10485760 curTime = time.time() # 1606456020.0718982 curDownloadedSize += curChunkSize # 10485760 totalDownloadedSize = curDownloadedSize + hasDownloadedSize f.write(chunkBytes) f.flush() if totalFileSize: downloadedPercent = round(100 * totalDownloadedSize / totalFileSize, 2) # 0.7 else: downloadedPercent = "Unknown" downloadedSizeStr = CommonUtils.formatSize(totalDownloadedSize) # '10.0MB' curDownloadTime = curTime - prevTime # 15.63818907737732 curSpeed = curChunkSize / curDownloadTime # 670522.651191692 curSpeedStr = CommonUtils.formatSize(curSpeed) # '654.8KB' curDownloadTime = curTime - startTime # 15.63818907737732 averageSpeed = curDownloadedSize / curDownloadTime # 670522.651191692 averageSpeedStr = CommonUtils.formatSize(averageSpeed) # '654.8KB' curDownloadTimeDict = CommonUtils.floatSecondsToDatetimeDict(curDownloadTime) curDownloadTimeStr = CommonUtils.datetimeDictToStr(curDownloadTimeDict, isShowMilliSecPart=False) # logging.info("download %s speed: cur=%s/s, time: total=%s, size: %s %s%%", logging.info("downloading %s: %s/s, %s, %s, %s%%", appname, curSpeedStr, curDownloadTimeStr, downloadedSizeStr, downloadedPercent) prevTime = curTime downloadTimeSecondsFloat = CommonUtils.calcTimeEnd(downloadCalcTimeKey) # 259.60212874412537 downloadTimeDict = CommonUtils.floatSecondsToDatetimeDict(downloadTimeSecondsFloat) # {'days': 0, 'hours': 0, 'microseconds': 128, 'millseconds': 602, 'minutes': 4, 'seconds': 19} del downloadTimeDict["millseconds"] del downloadTimeDict["microseconds"] downloadTimeStr = CommonUtils.datetimeDictToStr(downloadTimeDict) # '00:04:19' speedFloat = curDownloadedSize / downloadTimeSecondsFloat # 1724490.1117172781 speedStr = "%s/s" % CommonUtils.formatSize(speedFloat) # '1.6MB/s' logging.info("End download %s, size=%s, time=%s, speed=%s", appname, totalSizeStr, downloadTimeStr, speedStr)
待后续确认。