# coding=utf-8
#code by xi4okv QQ:48011203 site:xiaokui.cc
import urllib2 as url
import urllib2
import string
import urllib
import re
import sys
def help():
print "python baidu.py keyword page"
return
def baidu_search(keyword,pn):
p='wd='+keyword
res=url.urlopen("https://www.baidu.com/s?"+p+"&pn="+str(pn))
html=res.read()
# print html
return html
def get_url(html):
import re
if html:
urls_pat=re.compile(r'url":"(.*?)"}')
siteUrls=re.findall(urls_pat,html)
return siteUrls
else:
print "ERROR!"
siteUrls=False
def baidu_url(xk_url):
try:
baidu = urllib2.urlopen("http:"+xk_url)
if baidu:
return baidu.url
else:
print "ERROR!"
baidu.url=False
except:
print "ERROR!"
def main():
help()
fileName='result.lst'
mode='w+'
f=open(fileName,mode)
keyword = sys.argv[1]
page = string.atoi(sys.argv[2])
print 'search '+keyword+' in baidu:'
count = 1
while count < page+1:
count = count + 1
pn = 10 * count
html = baidu_search(keyword,pn)
urls = get_url(html)
for xk_url in urls:
if "link?url" in xk_url:
result = baidu_url(xk_url)
try:
f.write(result+"\n")
print result
except:
print "ERROR"
if __name__=='__main__':
main()