折腾:
【未解决】Python给requests下载文件加速提升速度
期间,顺带参考
去加上:
在下载期间,就实时统计下载速度:从开始下载到当前已下载的最新平均速度
以及后来想到了,可以通过计算chunk/时间 得到 实时的当前chunk的下载速度
当前代码稍加调整后是:
src/common/DownloadApps.py
def download(self,task):
。。。
totalSizeStr = ""
try:
r1 = requests.get(url, stream=True)
total_size = int(r1.headers['Content-Length']) # 447681304
totalSizeStr = CommonUtils.formatSize(total_size)
logging.info("app total size: %s", totalSizeStr)
except:
total_size = None
temp_size = os.path.getsize(filepath) if os.path.exists(filepath) else 0
headers = {
'Range': 'bytes=%d-' % temp_size,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
}
r = requests.get(url, stream=True, headers=headers)
# ChunkSize = 1024*1024*10 # 10MB
ChunkSize = 1024*1024*20 # 20MB
with open(filepath, "ab") as f:
for chunk in r.iter_content(chunk_size=ChunkSize):
if chunk:
temp_size += len(chunk)
f.write(chunk)
f.flush()
ratio = round(100 * temp_size / total_size,2) if total_size is not None else "unknown"
tempSizeStr = CommonUtils.formatSize(temp_size)
logging.info("download {0} {1}/{2} {3}%".format(appname, tempSizeStr, totalSizeStr, ratio))
downloadTimeSecondsFloat = CommonUtils.calcTimeEnd(downloadCalcTimeKey) # 259.60212874412537
downloadTimeDict = CommonUtils.floatSecondsToDatetimeDict(downloadTimeSecondsFloat) # {'days': 0, 'hours': 0, 'microseconds': 128, 'millseconds': 602, 'minutes': 4, 'seconds': 19}
del downloadTimeDict["millseconds"]
del downloadTimeDict["microseconds"]
downloadTimeStr = CommonUtils.datetimeDictToStr(downloadTimeDict) # '00:04:19'
if total_size:
totalSizeStr = CommonUtils.formatSize(total_size) # '426.9MB'
speedFloat = total_size / downloadTimeSecondsFloat # 1724490.1117172781
speedStr = "%s/s" % CommonUtils.formatSize(speedFloat) # '1.6MB/s'
else:
totalSizeStr = "Unkown"
speedStr = "Unkown"
# logging.info("download {} end".format(appname))
logging.info("download %s end, size=%s, time=%s, speed=%s", appname, totalSizeStr, downloadTimeStr, speedStr)继续再去优化,加上瞬时,实时下载速度
# ChunkSize = 1024*1024*10 # 10MB
ChunkSize = 1024*1024*20 # 20MB
with open(filepath, "ab") as f:
startTime = time.time()
prevTime = startTime
for chunk in r.iter_content(chunk_size=ChunkSize):
if chunk:
curTime = time.time()
downloadedSize += len(chunk)
f.write(chunk)
f.flush()
if total_size is not None:
downloadedPercent = round(100 * downloadedSize / total_size, 2)
else:
downloadedPercent = "unknown"
downloadedSizeStr = CommonUtils.formatSize(downloadedSize)
curChunkTime = curTime - prevTime
instantSpeed = chunk / curChunkTime
instantSpeedStr = CommonUtils.formatSize(instantSpeed)
curDownloadedTime = curTime - startTime
averageSpeed = downloadedSize / curDownloadedTime
averageSpeedStr = CommonUtils.formatSize(averageSpeed)
logging.info("download %s size: %s/%s %s%%, speed: avg=%s now=%s",
appname, downloadedSizeStr, totalSizeStr, downloadedPercent, averageSpeedStr, instantSpeedStr)
prevTime = curTime抽空去调试看看
【总结】
优化后代码:
headers = {
'Range': 'bytes=%d-' % totalDownloadedSize,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
}
r = requests.get(url, stream=True, headers=headers)
ChunkSize = 1024*1024*10 # 10MB
# ChunkSize = 1024*1024*20 # 20MB
with open(filepath, "ab") as f:
startTime = time.time()
prevTime = startTime
for chunkBytes in r.iter_content(chunk_size=ChunkSize):
if chunkBytes:
curDownloadedSize = len(chunkBytes) # 10485760
curTime = time.time() # 1606456020.0718982
totalDownloadedSize += curDownloadedSize # 10485760
f.write(chunkBytes)
f.flush()
if total_size is not None:
downloadedPercent = round(100 * totalDownloadedSize / total_size, 2) # 0.7
else:
downloadedPercent = "Unknown"
downloadedSizeStr = CommonUtils.formatSize(totalDownloadedSize) # '10.0MB'
curDownloadTime = curTime - prevTime # 15.63818907737732
instantSpeed = curDownloadedSize / curDownloadTime # 670522.651191692
instantSpeedStr = CommonUtils.formatSize(instantSpeed) # '654.8KB'
totalDownloadTime = curTime - startTime # 15.63818907737732
averageSpeed = totalDownloadedSize / totalDownloadTime # 670522.651191692
averageSpeedStr = CommonUtils.formatSize(averageSpeed) # '654.8KB'
logging.info("download %s size: %s/%s %s%%, speed: avg=%s now=%s",
appname, downloadedSizeStr, totalSizeStr, downloadedPercent, averageSpeedStr, instantSpeedStr)
prevTime = curTime
。。。多轮调试后,输出:
[201127 13:46:44][DownloadApps.py 86 ] start to download fknsg2/放开那三国2,url https://gameapktxdl.vivo.com.cn/appstore/developer/soft/20201027/202010271738254e7s6.apk [201127 13:46:44][DownloadApps.py 98 ] app total size: 1.4GB [201127 13:48:12][DownloadApps.py 136] download fknsg2/放开那三国2 size: 10.0MB/1.4GB 0.7%, speed: avg=654.8KB now=654.8KB [201127 13:52:39][DownloadApps.py 136] download fknsg2/放开那三国2 size: 20.0MB/1.4GB 1.4%, speed: avg=190.3KB now=111.4KB [201127 13:53:05][DownloadApps.py 136] download fknsg2/放开那三国2 size: 30.0MB/1.4GB 2.11%, speed: avg=82.5KB now=38.6KB [201127 13:53:12][DownloadApps.py 136] download fknsg2/放开那三国2 size: 40.0MB/1.4GB 2.81%, speed: avg=105.5KB now=659.6KB [201127 13:53:21][DownloadApps.py 136] download fknsg2/放开那三国2 size: 50.0MB/1.4GB 3.51%, speed: avg=129.0KB now=1.1MB [201127 13:53:30][DownloadApps.py 136] download fknsg2/放开那三国2 size: 60.0MB/1.4GB 4.21%, speed: avg=151.5KB now=1.1MB [201127 13:53:36][DownloadApps.py 136] download fknsg2/放开那三国2 size: 70.0MB/1.4GB 4.91%, speed: avg=174.0KB now=1.6MB [201127 13:53:43][DownloadApps.py 136] download fknsg2/放开那三国2 size: 80.0MB/1.4GB 5.62%, speed: avg=195.5KB now=1.4MB [201127 13:53:49][DownloadApps.py 136] download fknsg2/放开那三国2 size: 90.0MB/1.4GB 6.32%, speed: avg=216.7KB now=1.6MB [201127 13:53:55][DownloadApps.py 136] download fknsg2/放开那三国2 size: 100.0MB/1.4GB 7.02%, speed: avg=237.5KB now=1.7MB
就正常了。
不过平均速度很慢。
突然发现是调试导致的。
重新运行,不加断点,试试
果然正常了:
[201127 13:56:11][DownloadApps.py 86 ] start to download fknsg2/放开那三国2,url https://gameapktxdl.vivo.com.cn/appstore/developer/soft/20201027/202010271738254e7s6.apk [201127 13:56:11][DownloadApps.py 98 ] app total size: 1.4GB [201127 13:56:18][DownloadApps.py 136] download fknsg2/放开那三国2 size: 10.0MB/1.4GB 0.7%, speed: avg=1.5MB now=1.5MB [201127 13:56:24][DownloadApps.py 136] download fknsg2/放开那三国2 size: 20.0MB/1.4GB 1.4%, speed: avg=1.6MB now=1.7MB [201127 13:56:30][DownloadApps.py 136] download fknsg2/放开那三国2 size: 30.0MB/1.4GB 2.11%, speed: avg=1.6MB now=1.5MB [201127 13:56:39][DownloadApps.py 136] download fknsg2/放开那三国2 size: 40.0MB/1.4GB 2.81%, speed: avg=1.4MB now=1.2MB [201127 13:56:46][DownloadApps.py 136] download fknsg2/放开那三国2 size: 50.0MB/1.4GB 3.51%, speed: avg=1.5MB now=1.5MB [201127 13:56:52][DownloadApps.py 136] download fknsg2/放开那三国2 size: 60.0MB/1.4GB 4.21%, speed: avg=1.5MB now=1.6MB [201127 13:57:00][DownloadApps.py 136] download fknsg2/放开那三国2 size: 70.0MB/1.4GB 4.91%, speed: avg=1.4MB now=1.2MB [201127 13:57:08][DownloadApps.py 136] download fknsg2/放开那三国2 size: 80.0MB/1.4GB 5.62%, speed: avg=1.4MB now=1.2MB
速度符合预期。
为了方便查看已下载比例,调整参数位置:
logging.info("download %s speed: avg=%s now=%s, size: %s/%s %s%%",
appname, averageSpeedStr, instantSpeedStr, downloadedSizeStr, totalSizeStr, downloadedPercent)输出:
再去加个总时长吧
totalDownloadTime = curTime - startTime # 15.63818907737732
averageSpeed = totalDownloadedSize / totalDownloadTime # 670522.651191692
averageSpeedStr = CommonUtils.formatSize(averageSpeed) # '654.8KB'
totalDownloadTimeDict = CommonUtils.floatSecondsToDatetimeDict(totalDownloadTime)
totalDownloadTimeStr = CommonUtils.datetimeDictToStr(totalDownloadTimeDict, isShowMilliSecPart=False)
logging.info("download %s speed: now=%s/s, time: total=%s, size: %s %s%%",
appname, instantSpeedStr, totalDownloadTimeStr, downloadedSizeStr, downloadedPercent)效果:
[201127 14:26:33][DownloadApps.py 86 ] start to download gcld/攻城掠地,url https://gameapktxdl.vivo.com.cn/appstore/developer/soft/20201026/202010261519026im2p.apk [201127 14:26:33][DownloadApps.py 98 ] app total size: 596.4MB [201127 14:26:39][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:00:05, size: 10.0MB 1.68% [201127 14:26:45][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:00:11, size: 20.0MB 3.35% [201127 14:26:51][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:00:17, size: 30.0MB 5.03%
即可。
【总结】
此处最后代码:
def download(self,task):
。。。
totalSizeStr = ""
try:
r1 = requests.get(url, stream=True)
total_size = int(r1.headers['Content-Length']) # 447681304
totalSizeStr = CommonUtils.formatSize(total_size)
logging.info("app total size: %s", totalSizeStr)
except:
logging.warning("Fail to get total size 'Content-Length' from %s", url)
total_size = None
totalDownloadedSize = os.path.getsize(filepath) if os.path.exists(filepath) else 0
headers = {
'Range': 'bytes=%d-' % totalDownloadedSize,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
}
r = requests.get(url, stream=True, headers=headers)
ChunkSize = 1024*1024*10 # 10MB
# ChunkSize = 1024*1024*20 # 20MB
with open(filepath, "ab") as f:
startTime = time.time()
prevTime = startTime
for chunkBytes in r.iter_content(chunk_size=ChunkSize):
if chunkBytes:
curDownloadedSize = len(chunkBytes) # 10485760
curTime = time.time() # 1606456020.0718982
totalDownloadedSize += curDownloadedSize # 10485760
f.write(chunkBytes)
f.flush()
if total_size is not None:
downloadedPercent = round(100 * totalDownloadedSize / total_size, 2) # 0.7
else:
downloadedPercent = "Unknown"
downloadedSizeStr = CommonUtils.formatSize(totalDownloadedSize) # '10.0MB'
curDownloadTime = curTime - prevTime # 15.63818907737732
curSpeed = curDownloadedSize / curDownloadTime # 670522.651191692
curSpeedStr = CommonUtils.formatSize(curSpeed) # '654.8KB'
totalDownloadTime = curTime - startTime # 15.63818907737732
averageSpeed = totalDownloadedSize / totalDownloadTime # 670522.651191692
averageSpeedStr = CommonUtils.formatSize(averageSpeed) # '654.8KB'
totalDownloadTimeDict = CommonUtils.floatSecondsToDatetimeDict(totalDownloadTime)
totalDownloadTimeStr = CommonUtils.datetimeDictToStr(totalDownloadTimeDict, isShowMilliSecPart=False)
logging.info("download %s speed: cur=%s/s, time: total=%s, size: %s %s%%",
appname, curSpeedStr, totalDownloadTimeStr, downloadedSizeStr, downloadedPercent)
prevTime = curTime
downloadTimeSecondsFloat = CommonUtils.calcTimeEnd(downloadCalcTimeKey) # 259.60212874412537
downloadTimeDict = CommonUtils.floatSecondsToDatetimeDict(downloadTimeSecondsFloat) # {'days': 0, 'hours': 0, 'microseconds': 128, 'millseconds': 602, 'minutes': 4, 'seconds': 19}
del downloadTimeDict["millseconds"]
del downloadTimeDict["microseconds"]
downloadTimeStr = CommonUtils.datetimeDictToStr(downloadTimeDict) # '00:04:19'
if total_size:
totalSizeStr = CommonUtils.formatSize(total_size) # '426.9MB'
speedFloat = total_size / downloadTimeSecondsFloat # 1724490.1117172781
speedStr = "%s/s" % CommonUtils.formatSize(speedFloat) # '1.6MB/s'
else:
totalSizeStr = "Unknown"
speedStr = "Unknown"
# logging.info("download {} end".format(appname))
logging.info("download %s end, size=%s, time=%s, speed=%s", appname, totalSizeStr, downloadTimeStr, speedStr)
输出效果:
[201127 14:26:33][DownloadApps.py 86 ] start to download gcld/攻城掠地,url https://gameapktxdl.vivo.com.cn/appstore/developer/soft/20201026/202010261519026im2p.apk [201127 14:26:33][DownloadApps.py 98 ] app total size: 596.4MB [201127 14:26:39][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:00:05, size: 10.0MB 1.68% [201127 14:26:45][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:00:11, size: 20.0MB 3.35% [201127 14:26:51][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:00:17, size: 30.0MB 5.03% [201127 14:26:56][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.8MB/s, time: total=00:00:23, size: 40.0MB 6.71% [201127 14:27:02][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.6MB/s, time: total=00:00:29, size: 50.0MB 8.38% 。。。 [201127 14:27:28][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.4MB/s, time: total=00:00:55, size: 90.0MB 15.09% [201127 14:27:34][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.6MB/s, time: total=00:01:01, size: 100.0MB 16.77% 。。。 [201127 14:28:57][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.2MB/s, time: total=00:02:23, size: 230.0MB 38.56% [201127 14:29:03][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.7MB/s, time: total=00:02:29, size: 240.0MB 40.24% [201127 14:29:08][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.8MB/s, time: total=00:02:35, size: 250.0MB 41.92% 。。。 [201127 14:32:26][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.8MB/s, time: total=00:05:53, size: 590.0MB 98.92% [201127 14:32:30][DownloadApps.py 139] download gcld/攻城掠地 speed: cur=1.8MB/s, time: total=00:05:56, size: 596.4MB 100.0% [201127 14:32:30][DownloadApps.py 157] download gcld/攻城掠地 end, size=596.4MB, time=00:05:57, speed=1.7MB/s
即可。
【后记 20201214】
当支持断点续传后,计算总下载速度有误,去更新为:
hasDownloadedSize = os.path.getsize(filepath) if os.path.exists(filepath) else 0
if totalFileSize:
hasDownloadPercent = float(hasDownloadedSize / totalFileSize) # 293601280 / 304130341 = 0.9653797744566367
else:
hasDownloadPercent = 0.0
hasDownloadPercent100 = hasDownloadPercent * 100
logging.info("Has downloaded %.2f%%", hasDownloadPercent100)
curDownloadedSize = 0
headers = {
'Range': 'bytes=%d-' % hasDownloadedSize,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36",
}
r = requests.get(url, stream=True, headers=headers)
# ChunkSize = 1024*1024*10 # 10MB
ChunkSize = 1024*1024*20 # 20MB
with open(filepath, "ab") as f:
startTime = time.time()
prevTime = startTime
for chunkBytes in r.iter_content(chunk_size=ChunkSize):
if chunkBytes:
curChunkSize = len(chunkBytes) # 10485760
curTime = time.time() # 1606456020.0718982
curDownloadedSize += curChunkSize # 10485760
totalDownloadedSize = curDownloadedSize + hasDownloadedSize
f.write(chunkBytes)
f.flush()
if totalFileSize:
downloadedPercent = round(100 * totalDownloadedSize / totalFileSize, 2) # 0.7
else:
downloadedPercent = "Unknown"
downloadedSizeStr = CommonUtils.formatSize(totalDownloadedSize) # '10.0MB'
curDownloadTime = curTime - prevTime # 15.63818907737732
curSpeed = curChunkSize / curDownloadTime # 670522.651191692
curSpeedStr = CommonUtils.formatSize(curSpeed) # '654.8KB'
curDownloadTime = curTime - startTime # 15.63818907737732
averageSpeed = curDownloadedSize / curDownloadTime # 670522.651191692
averageSpeedStr = CommonUtils.formatSize(averageSpeed) # '654.8KB'
curDownloadTimeDict = CommonUtils.floatSecondsToDatetimeDict(curDownloadTime)
curDownloadTimeStr = CommonUtils.datetimeDictToStr(curDownloadTimeDict, isShowMilliSecPart=False)
# logging.info("download %s speed: cur=%s/s, time: total=%s, size: %s %s%%",
logging.info("downloading %s: %s/s, %s, %s, %s%%",
appname, curSpeedStr, curDownloadTimeStr, downloadedSizeStr, downloadedPercent)
prevTime = curTime
downloadTimeSecondsFloat = CommonUtils.calcTimeEnd(downloadCalcTimeKey) # 259.60212874412537
downloadTimeDict = CommonUtils.floatSecondsToDatetimeDict(downloadTimeSecondsFloat) # {'days': 0, 'hours': 0, 'microseconds': 128, 'millseconds': 602, 'minutes': 4, 'seconds': 19}
del downloadTimeDict["millseconds"]
del downloadTimeDict["microseconds"]
downloadTimeStr = CommonUtils.datetimeDictToStr(downloadTimeDict) # '00:04:19'
speedFloat = curDownloadedSize / downloadTimeSecondsFloat # 1724490.1117172781
speedStr = "%s/s" % CommonUtils.formatSize(speedFloat) # '1.6MB/s'
logging.info("End download %s, size=%s, time=%s, speed=%s", appname, totalSizeStr, downloadTimeStr, speedStr)
待后续确认。