旋轉拖動驗證碼解決方案
曾幾何時,你是否被一個旋轉驗證碼而困擾,沒錯今日主題——旋轉驗證碼。
之前也是被他傷透了心,研究了好幾天的js,想直接通過接口傳輸直接解決驗證碼的,然而我失敗了,不過這一次,他來了他來了,他帶著RotNet走來了。
彩虹屁
RotNet也是我無意間發(fā)現(xiàn)的,沒錯時隔了好幾個月,他自己出現(xiàn)在我眼前的。這是他的github:https://github.com/d4nst/RotNet/tree/master,他主要是預測圖像的旋轉角度以校正其方向,庫中包括很全,數(shù)據(jù)集的下載,訓練,預測全都有,而且最最最重要的是,大神提供了模型,我的天。。。這是什么神仙,你是孫悟空派來拯救我的吧!兄弟?。?!
當然有興趣的同學可以看看他的文章,有具體的思路和網(wǎng)絡實現(xiàn)。還有覺得有用的同學可以星一下他的github
好的,話不多說,先看看我最后的成果吧,
思路和修改
然后因為在跳出驗證碼的時候一般是直接給出圖片的網(wǎng)址,所以我修改了源文件,用來直接讀取網(wǎng)絡圖片和修整圖片大小來適應網(wǎng)絡,
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
#utils.py
#在RotNetDataGenerator._get_batches_of_transformed_samples中添加響應代碼 #增加讀取網(wǎng)絡圖片的函數(shù) class RotNetDataGenerator(Iterator): def _get_batches_of_transformed_samples(self, index_array): # create array to hold the images batch_x = np.zeros((len(index_array),) + self.input_shape, dtype='float32') # create array to hold the labels batch_y = np.zeros(len(index_array), dtype='float32') # iterate through the current batch for i, j in enumerate(index_array): if self.filenames is None: image = self.images[j] else: is_color = int(self.color_mode == 'rgb') #修改這這一塊{{{{{{{{{ image = ImageScale(self.filenames[j]) if self.filenames[j][:4].lower()=="http" else cv2.imread(self.filenames[j], is_color) h,w=image.shape[:2] if h !=224 or w !=224: image = cv2.resize(image, (224, 224), interpolation=cv2.INTER_CUBIC) #}}}}}}}} if is_color: image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if self.rotate: # get a random angle rotation_angle = np.random.randint(360) else: rotation_angle = 0 # generate the rotated image rotated_image = generate_rotated_image( image, rotation_angle, size=self.input_shape[:2], crop_center=self.crop_center, crop_largest_rect=self.crop_largest_rect ) # add dimension to account for the channels if the image is greyscale if rotated_image.ndim == 2: rotated_image = np.expand_dims(rotated_image, axis=2) # store the image and label in their corresponding batches batch_x[i] = rotated_image batch_y[i] = rotation_angle if self.one_hot: # convert the numerical labels to binary labels batch_y = to_categorical(batch_y, 360) else: batch_y /= 360 # preprocess input images if self.preprocess_func: batch_x = self.preprocess_func(batch_x) return batch_x, batch_y def ImageScale(url): resp = request.urlopen(url) image = np.asarray(bytearray(resp.read()), dtype="uint8") image = cv2.imdecode(image, cv2.IMREAD_COLOR) return image |
預測角度,也是根據(jù)他的源碼基礎上做修改的,需要注意的是模型位置和測試圖片的位置需要修改為你電腦上的文件位置
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
from __future__ import print_function
import os import numpy as np from keras.applications.imagenet_utils import preprocess_input from keras.models import load_model from utils import RotNetDataGenerator, angle_error def process_images(input_path, batch_size=64, crop=True): #需要修改模型文件位置 model = load_model("I:\\pythonProject\\RotNet\\rotnet_models\\rotnet_street_view_resnet50_keras2.hdf5", custom_objects={'angle_error': angle_error}, compile=False) extensions = ['.jpg', '.jpeg', '.bmp', '.png'] if os.path.isfile(input_path) or input_path[:4].lower()=="http": image_paths = [input_path] else: image_paths = [os.path.join(input_path, f) for f in os.listdir(input_path) if os.path.splitext(f)[1].lower() in extensions] predictions = model.predict_generator( RotNetDataGenerator( image_paths, input_shape=(224, 224, 3), batch_size=batch_size, one_hot=True, preprocess_func=preprocess_input, rotate=False, crop_largest_rect=True, crop_center=True ), val_samples=len(image_paths) ) predicted_angles = np.argmax(predictions, axis=1) print(predicted_angles) return predicted_angles if __name__ == '__main__': #修改測試圖片位置,本地地址,或是網(wǎng)絡圖片地址 process_images("I:\\pythonProject\\RotNet\\data\\test_examples\\008999_4.jpg") |
然后通過分析百度指數(shù)的js源碼發(fā)現(xiàn)旋轉角度的公式是 angle=o/b*360
即o為拖動的距離,b=底軸寬-按鈕寬
所以我們需要知道的拖動的距離就是 o=angle*360*b
好的,匯總到一起,就可以了。模擬登錄百度指數(shù),而且支持無頭模式
中間有參考一段這位老哥寫的pyppeteer的拖動,https://blog.csdn.net/qq393912540/article/details/91956136
還有這位老哥的反爬策略
https://github.com/wkunzhi/Python3-Spider/blob/master/%E3%80%90%E6%B7%98%E5%AE%9D%E3%80%91%E8%87%AA%E5%8A%A8%E7%99%BB%E9%99%86/auto_login_pyppeteer.py
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import asyncio
from pyppeteer import launch import random from correct_rotation_for_angle import process_images async def page_evaluate(page): await page.evaluate( '''() =>{ Object.defineProperties(navigator,{ webdriver:{ get: () => false } });window.screen.width=1366; }''') await page.evaluate('''() =>{ window.navigator.chrome = { runtime: {}, };}''') await page.evaluate('''() =>{ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); }''') await page.evaluate('''() =>{ Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5,6], }); }''') async def main(username, password, width, height): browser = await launch({'headless': False,#可以無頭 'slowMo':1.3, 'userDataDir': './userdata', 'args': [ f'--window-size={width},{height}' '--disable-extensions', '--hide-scrollbars', '--disable-bundled-ppapi-flash', '--mute-audio', '--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu', '--disable-infobars' ], 'dumpio': True }) page = await browser.newPage() # 設置瀏覽器頭部 await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36") # 設置瀏覽器大小 await page.setViewport({'width': width, 'height': height}) # 注入js,防反爬 await page_evaluate(page) res=await page.goto('http://index.baidu.com/v2/index.html') await page.waitFor(2000) # 獲取登錄位置的文字,如果是登錄就登錄,不是就使用cookie elements = await (await(await page.querySelector('.username-text')).getProperty('textContent')).jsonValue() if elements == "登錄": await page.click(".username-text") await asyncio.sleep(1.6) # 填寫用戶名 await page.type('.pass-text-input-userName', username) # 填寫密碼 await page.hover(".pass-text-input-password") await asyncio.sleep(0.5) await page.mouse.down() await asyncio.sleep(random.random()) await page.mouse.up() # await page.click(".pass-text-input-password") await page.type('.pass-text-input-password', password) # 點擊登錄 await page.mouse.move(page.mouse._x+random.randint(50,100), page.mouse._y+random.randint(100,200), options={"step": 3}) await page.hover(".pass-button-submit") await page.mouse.down() await asyncio.sleep(random.random()) await page.mouse.up() # await page.click(".pass-button-submit") await asyncio.sleep(2) rotImg = await page.querySelector('.vcode-spin-img') # 如果有驗證碼就去旋轉 while rotImg: img_url=await (await(rotImg).getProperty("src")).jsonValue() angle=process_images(img_url)[0] bottom_line=await (await(await page.querySelector(".vcode-spin-bottom")).getProperty("offsetWidth")).jsonValue() button_line = await (await(await page.querySelector(".vcode-spin-button")).getProperty("offsetWidth")).jsonValue() b=bottom_line-button_line move_line = angle/360*b await try_validation(page,move_line) # 停個3秒 await asyncio.sleep(3) rotImg = await page.querySelector('.vcode-spin-img') #如果有需要短信驗證碼的彈窗的就費了 no_in = await page.querySelector(".pass-forceverify-wrapper .forceverify-header-a") if no_in: print("有短信驗證碼廢了") await no_in.click() # 停個2秒 await asyncio.sleep(2) cookies = await page.cookies() # 無頭模式可以打印一下用戶名看看能不能登錄 elements = await (await(await page.querySelector('.username-text')).getProperty('textContent')).jsonValue() print(elements) await browser.close() if elements == "登錄": return None return cookies async def try_validation(page, distance=308): # 將距離拆分成兩段,模擬正常人的行為 distance1 = distance - 10 distance2 = 10 btn_position = await page.evaluate(''' () =>{ return { x: document.querySelector('.vcode-spin-button').getBoundingClientRect().x, y: document.querySelector('.vcode-spin-button').getBoundingClientRect().y, width: document.querySelector('.vcode-spin-button').getBoundingClientRect().width, height: document.querySelector('.vcode-spin-button').getBoundingClientRect().height }} ''') x = btn_position['x'] + btn_position['width'] / 2 y = btn_position['y'] + btn_position['height'] / 2 # print(btn_position) await page.mouse.move(x, y) await page.mouse.down() await page.mouse.move(x + distance1, y, {'steps': 30}) await page.waitFor(800) await page.mouse.move(x + distance1 + distance2, y, {'steps': 20}) await page.waitFor(800) await page.mouse.up() def baidu_login(username, password, width, height): return asyncio.get_event_loop().run_until_complete(main(username, password, width, height)) if __name__ == "__main__": width, height = 1366, 768 username = '你的賬戶' password = '你的密碼' cookies = baidu_login(username, password, width, height) print(cookies) if cookies: string_cookies = "" for each in cookies: string_cookies += f"{each['name']}={each['value']};" |
最后
完整的項目放在https://github.com/ShortCJL/RotateCode,注意:需要把模型下載下來解壓到根目錄