自己写了玩的一个小脚本,百度图片下载
- import re
- import os
- import requests
- import hashlib
-
-
- def dowmloadPic(html, keyword):
- pic_url = re.findall('"objURL":"(.*?)",', html, re.S)
-
- if len(pic_url) < 1:
- return 1
-
- i = 0
- for each in pic_url:
- print(i + 1, end=',')
- md5Str = hashlib.md5(each.encode("utf-8")).hexdigest()
-
- # 抓去链接
- oneStr = md5Str + ' ' + keyword + ' ' + each + '\n'
- with open('downText.txt', 'a+') as f:
- f.write(oneStr)
-
- # 下载图片
- # try:
- # pic = requests.get(each, timeout=10)
- # except requests.exceptions.ConnectionError:
- # print('链接超时,跳过此操作')
- # continue
- #
- # kz = os.path.splitext(each)[-1]
- # photo = + keyword + '_' + str(i) + kz
- #
- # with open(photo, 'wb') as f:
- # f.write(pic.content)
- i += 1
-
- print('\n')
- return 0
-
-
- if __name__ == '__main__':
-
- word = input('enter a key word:')
- page = input('enter the page:')
-
- page = int(page)
- page = 1 if page < 1 else page
-
- url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=' + word + '&ct=201965323&v=flip'
-
- p = 1
- while (p <= page):
- print(word + ',第[' + str(p) + ']页:')
-
- pn = (p - 1) * 20
- url = url + '&pn=' + str(pn)
- result = requests.get(url).content.decode('utf-8')
-
- code = dowmloadPic(result, word)
-
- if code:
- print('无相关数据,提前退出程序')
- break
- p = p + 1
-
- print('程序结束')
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持w3xue。