ins没有下载功能所以自己写了个本地渣代码
# -*- coding: gbk -*-
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from flask_cors import CORS
import requests
import os
import re
import time
import configparser
from flask import Flask, request, render_template
config = configparser.ConfigParser()
config.read('config.ini')
# 读取配置
duankou = config['Server']['ProxyPort']
app = Flask(__name__)
CORS(app)
# 定义 path 变量
path = os.getcwd()
@app.route('/mp4/ins/instagram', methods=['GET'])
def proxy_instagram():
instagram_url = request.args.get('url')
if not instagram_url:
return 'Missing "url" parameter', 400
# 使用 requests 库向 Instagram 发送请求
response = requests.get(instagram_url)
# 将 Instagram 的响应直接返回给前端
return response.content, response.status_code, response.headers.items()
@app.route('/')
def index():
return render_template('index.html')
@app.route('/process', methods=['POST'])
def process():
data = request.get_json()
link = data.get('link')
proxy_port = data.get('proxyPort')
if not link:
return 'Missing "link" parameter', 400
# 传递link和proxy_port参数
run_instagram_scraper(link, proxy_port)
return '采集完成'
def run_instagram_scraper(link, proxy_port):
global keyword # 使用全局变量 keyword
def create_driver():
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
return webdriver.Chrome(options=chrome_options)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
}
with open('password.txt', 'r', encoding='utf-8') as fi:
txt = fi.read()
zh, mm = txt.split('-')
keyword = re.findall('https://www.instagram.com/(.*)/', link)[0]
keyword = re.sub(r'[\/:*?"<>|]', '', keyword)
if not os.path.exists(os.path.join(path, keyword)):
os.makedirs(os.path.join(path, keyword))
driver = create_driver()
driver.get('https://www.instagram.com/')
# 使用 WebDriverWait 等待元素加载
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//input[@name="username"]'))
)
driver.find_element(By.XPATH, '//input[@name="username"]').send_keys(zh)
time.sleep(1)
# 使用 WebDriverWait 等待元素加载
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//input[@name="password"]'))
)
driver.find_element(By.XPATH, '//input[@name="password"]').send_keys(mm)
time.sleep(1)
ActionChains(driver).send_keys(Keys.ENTER).perform()
time.sleep(3)
cookies = {}
cookie = driver.get_cookies()
for cc in cookie:
cookies[cc["name"]] = cc["value"]
driver.quit() # 关闭登录浏览器
print('扫描完成,开始采集')
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
'x-ig-app-id': '936619743392459'
}
# 添加文件夹创建的代码
if not os.path.exists(os.path.join(path, keyword, '照片')):
os.makedirs(os.path.join(path, keyword, '照片'))
url_ = 'https://i.instagram.com/api/v1/users/web_profile_info/?username={}'.format(keyword)
r1 = requests.get(url_, headers=headers, cookies=cookies)
id = r1.json()['data']['user']['id']
print(id)
url = 'https://www.instagram.com/graphql/query/'
cursor = ""
num = 0
while True:
params = {
'query_hash': '69cba40317214236af40e7efa697781d',
'variables': '{"id":' + f"{str(id)}" + ',"first":12,"after":"{}"'.format(cursor) + """}""",
}
r = requests.get(url, headers=headers, params=params, cookies=cookies)
datas = r.json()['data']['user']['edge_owner_to_timeline_media']['edges']
has_next_pages = r.json()['data']['user']['edge_owner_to_timeline_media']['page_info']
has_next_page = has_next_pages['has_next_page']
for i in datas:
num += 1
print('正在采集第{}个'.format(num))
img_link = i['node']['display_url']
rr = requests.get(img_link, headers=headers)
# 修改文件路径的代码
with open(os.path.join(path, keyword, '照片', '{}.jpg'.format(num)), 'wb') as file:
file.write(rr.content)
if not has_next_page:
break
else:
cursor = has_next_pages['end_cursor']
time.sleep(0.5)
if __name__ == '__main__':
app.run()