1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
| import urllib2 import re import os
log = open('log.txt','w') log.write('****************get pic*************\n\n')
page = 1 index = 0 while page <= 10: data = urllib2.urlopen('http://www.ssyer.com/index_page_'+ str(page) +'.html').read() log.write('**********************'+str(page)+'\n\n') reg = r'<img class="pic" src="(.*?)" />' lists = re.findall(reg,data) for item in lists: print str(item) image = urllib2.urlopen('http://www.ssyer.com/'+item).read() file = open( str(index) + '.jpg','wb') file.write(image) file.flush index+=1
page+=1
|