python实现中文搜索免费图库,自动分类下载到对应目录

代码:

import urllib.request
from httpx import request
import requests
import os
import requests
import json
import re
import time
import pandas as pd
#修改成中文,然后翻译
#把搜索词做成了中文,然后google翻译去搜索

def googleTranslate(text):
    url = 'https://translate.google.cn/_/TranslateWebserverUi/data/batchexecute?rpcids=MkEWBc&f.sid=-2984828793698248690&bl=boq_translate-webserver_20201221.17_p0&hl=zh-CN&soc-app=1&soc-platform=1&soc-device=1&_reqid=5445720&rt=c'
    headers = {
        'origin': 'https://translate.google.cn',
        'referer': 'https://translate.google.cn/',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-origin',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
        'x-client-data': 'CIW2yQEIpbbJAQjEtskBCKmdygEIrMfKAQj2x8oBCPfHygEItMvKAQihz8oBCNzVygEIi5nLAQjBnMsB',
        'Decoded': 'message ClientVariations {repeated int32 variation_id = [3300101, 3300133, 3300164, 3313321, 3318700, 3318774, 3318775, 3319220, 3319713, 3320540, 3329163, 3329601];}',
        'x-same-domain': '1'
    }
    data = {
        'f.req': f'[[["MkEWBc","[[\\"{text}\\",\\"auto\\",\\"en\\",true],[null]]",null,"generic"]]]'
    }

    res = requests.post(url, headers=headers, data=data).text
    pattern = '\)\]\}\'\s*\d{3,4}\s*\[(.*)\s*'
    part1 = re.findall(pattern, res)
    part1_list = json.loads('[' + part1[0])[0]
    if part1_list[2] is None:
        print(text)
        return text
    content1 = part1_list[2].replace('\n', '')
    part2_list = json.loads(content1)[1][0][0][5:][0]
    s = ''
    for i in part2_list:
        s += i[0]
        # s += i[1][1]
    print(s)
    return s
  
  

def download_image_improved(search_key,url):
  #伪造headers信息
  headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"}
  #限定URL
  url = url
  response = requests.get(url, headers=headers, stream=True)
  from contextlib import closing
  #用完流自动关掉
  with closing(requests.get(url, headers=headers, stream=True)) as response:
    #打开文件
    img_name=url.split('/')[5]
    save_file_path=os.getcwd()+'\\images\\'+search_key +'\\'
    if not os.path.exists(save_file_path):
        os.mkdir(save_file_path)
    save_file_name=save_file_path+img_name
    
    with open(save_file_name, 'wb') as fd:
      #每128写入一次
      for chunk in response.iter_content(128):
        fd.write(chunk)
      print("下载图片到这里了:"+save_file_name)
      
from pexels_api import API
PEXELS_API_KEY = 'xxx'
api = API(PEXELS_API_KEY)
search_key='白芸豆'
api.search(googleTranslate(search_key), page=1, results_per_page=20)
photos = api.get_entries()
for photo in photos:
  print('Photo original size: ', photo.original)
  download_image_improved(search_key,photo.original)

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注