写了好久的爬虫, 就像爬个大的网站学习一下, 于是用淘宝练得一下手,
账号密码滑动验证都解决了,但是卡在了人机验证,代码贴出来,
大家指点一下,看一下有啥可以改进的。
from playwright.sync_api import sync_playwright
from playwright.async_api import async_playwright
import time,csv,random
def taobao(url, username, password, js):
with sync_playwright() as playwright:
browser = playwright.chromium.launch(headless=False) # 启动Chromium浏览器,headless=False表示不以无头模式运行,即显示浏览器界面
context = browser.new_context()
page = context.new_page() # 新建一个页面
page.add_init_script(js)
page.goto(url) # 导航到淘宝网站
with context.expect_page() as new_page_info: # 等待新页面打开的事件
page.locator(".bottom-bar-tmpl-right-receiveBtn").click()
new_page = new_page_info.value
with open('stealth.min.js','r') as f:
js=f.read()
new_page.add_init_script(js)
# new_page.click('.sms-login-tab-item')
# time.sleep(2) # 等待一段时间,待页面加载出来再执行js代码
# new_page.click('.password-login-tab-item')
# 输入用户名和密码
# page.get_by_role("link", name="登录").click()
new_page.fill('#fm-login-id', username)
new_page.fill('#fm-login-password', password)
# 点击登录按钮
# new_page.click('.password-login')
time.sleep(2) # 等待一段时间,待页面加载出来再执行js代码
element_handle = new_page.locator("#nocaptcha")
if element_handle:
print('当前页面出现滑块')
#await page.screenshot({'path': './headless-login-slide.png'}) # 截图测试
flag,page = mouse_slide(page=new_page, js=js) #js拉动滑块过去。
if flag:
new_page.keyboard.press('Enter') # 确保内容输入完毕,少数页面会自动完成按钮点击
print("print enter",flag)
new_page.evaluate('''document.getElementById("J_SubmitStatic").click()''') # 如果无法通过回车键完成点击,就调用js模拟点击登录按钮。
time.sleep(2)
page.pause() # 页面断点调试
def mouse_slide(page=None, js=None):
time.sleep(2)
try :
page.add_init_script(js)
slider = page.locator('#baxia-dialog-content').bounding_box()
page.mouse.move(x=slider['x'], y=slider['y']+slider['height']/2)
page.mouse.down()
page.mouse.move(slider['x']+400, slider['y']+slider['height']/2, steps=100)
page.mouse.up()
except Exception as e:
print(e, ':验证失败')
return None,page
else:
text = page.frame_locator("#baxia-dialog-content").get_by_text()
print(text)
page.pause()
# 判断是否通过
slider_again = page.Jeval('.nc-lang-cnt', 'node => node.textContent')
if slider_again != '验证通过':
return None,page
else:
#page.screenshot({'path': './headless-slide-result.png'}) # 截图测试
print('验证通过')
return 1,page
page.pause() # 页面断点调试
def main():
# 示例
url = 'https://s.click.taobao.com/WPDvJdt'
with open('stealth.min.js','r') as f:
js=f.read()
username = '淘宝账户'
password = '淘宝密码'
a_login = taobao(url, username, password, js)
if __name__ in "__main__":
main()
本文为冯奎原创文章,转载无需和我联系,但请注明来自冯奎博客fengkui.net
最新评论