python下载文件

需求描述: 在本地的虚拟机上需要多个docker 镜像,通过docker pull 没有拉取到镜像,在内网服务器上有多个docker镜像,通过

1
python -m SimpleHTTPServer #开启Web服务

通过如下脚本下载,脚本解决了大文件下载中的问题

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#-*- coding:utf-8 -*-

import requests
from bs4 import BeautifulSoup
url = "http://10.10.7.32:8000/all"
crawl = requests.get(url)
soup = BeautifulSoup(crawl.text,"html.parser")
#print crawl.text
base_url = "http://10.10.7.32:8000/all/"
links = soup.find_all('a')
new_links = []
for i in links:
print i.string
new_link = base_url + i.string
new_links.append(new_link)
print new_links
for i in new_links:
#print i
crawl = requests.get(i,stream=True)
try:
#解决大文件下载问题
fi = open(i[-16:],'wb')
for chunk in crawl.iter_content(chunk_size = 1024):
if chunk:
fi.write(chunk)
fi.flush()
#f.write(crawl.content)
fi.close()
except:
print i +" " + "wrong"
#break