文章目录

python spider

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import urllib2
import re
import os

log = open('log.txt','w')
log.write('****************get pic*************\n\n')

page = 1
index = 0
while page <= 10:
data = urllib2.urlopen('http://www.ssyer.com/index_page_'+ str(page) +'.html').read()
log.write('**********************'+str(page)+'\n\n')
reg = r'<img class="pic" src="(.*?)" />'
lists = re.findall(reg,data)
for item in lists:
print str(item)
image = urllib2.urlopen('http://www.ssyer.com/'+item).read()
file = open( str(index) + '.jpg','wb')
file.write(image)
file.flush
index+=1

page+=1
文章目录
Fork me on GitHub