How to bypass TikTok's bot detection (selenium) - python

Im currently trying to make a bot for tiktok thats just uploading a Video after one hour, then second hour, etc On multiple accounts at the same time, same video. But im currently stuck at the login Screen, it tells me "too many login attempts. Try again later" everytime selenium tries to login, the weird thing is that it worked 5 days ago and it worked perfectly fine without any errors, but now this happend.
Does anybody have any Idea why this happens?
Here's the code, if there is anything else you need to know feel free to ask:
import os
import time
from selenium_profiles.driver import driver as mydriver
from selenium_profiles.profiles import profiles
from selenium_profiles.scripts.driver_utils import actions
import undetected_chromedriver as uc
from multiprocessing import Process
from openpyxl import Workbook, load_workbook
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium_stealth import stealth
import requests
import pyautogui
import random
wb = load_workbook("backend.xlsx")
ws = wb.active
accountCounter = 1
#These Variables are changeable
accountAmount = 1
hashtags = "#rich #money #luxury #millionaire #business #luxurylifestyle #lifestyle"
upload_cooldown = 30
######
mobile_emulation = {
"deviceName": "Nokia N9"
}
s = Service("chromedriver_bypass.exe")
options = Options()
mydriver = mydriver()
profile = profiles.Android()
os.chdir("Vids")
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--profile-dictonary=Default")
options.add_experimental_option("mobileEmulation", mobile_emulation)
#user_agent = "Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36"
def findPasswords():
passwords = []
for x in range(0, accountAmount):
password = ws["B" + str(x + 2)].value
passwords.append(password)
return passwords
def findUsers():
users = []
for x in range(0, accountAmount):
user = ws["A" + str(x + 2)].value
users.append(user)
return users
def createWebDrivers(accAmount):
webDrivers = []
for x in range(0,accAmount):
#webDriver_x = webdriver.Chrome(service=s,options=options)
webDriver_x = mydriver.start(profile, uc_driver=False)
webDrivers.append(webDriver_x)
return webDrivers
def createWaitDrivers(webDrivers):
waitDrivers = []
webDriverAmount = len(webDrivers)
for x in range(0,webDriverAmount):
waitDriver_x = WebDriverWait(webDrivers[x],7)
waitDrivers.append(waitDriver_x)
return waitDrivers
webDrivers = createWebDrivers(accountAmount)
waitDrivers = createWaitDrivers(webDrivers)
passwords = findPasswords()
users = findUsers()
def login(webDriver, waitDriver, user, password):
#Fixing the Bot detection
webDriver.get("https://www.tiktok.com/login/phone-or-email/email")
#webDriver.get("https://bot.sannysoft.com/")
time.sleep(random.uniform(1,3))
y = input("Click away everything on the screen, if you have done so please write 'y'")
if "y" in y:
# username
userBox = webDriver.find_element(By.XPATH, "/html/body/div[2]/div[1]/div/div[1]/form/div[1]/input")
userBoxLocation = actions.mid_location(userBox)
action = actions.touch_action_chain()
action.pointer_action.move_to_location(userBoxLocation["x"], userBoxLocation["y"])
action.pointer_action.pointer_down()
action.perform()
time.sleep(random.uniform(1,3))
for char in user:
waitDriver.until(EC.presence_of_element_located((By.NAME, "username"))).send_keys(char)
time.sleep(random.uniform(.01, .1))
time.sleep(10)
# password
waitDriver.until(EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/div[1]/div/div[1]/form/div[2]/div/input"))).click()
time.sleep(random.uniform(1,3))
for char in password:
waitDriver.until(EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/div[1]/div/div[1]/form/div[2]/div/input"))).send_keys(char)
time.sleep(random.uniform(.1, .2))
time.sleep(15)
waitDriver.until(EC.presence_of_element_located((By.XPATH,"/html/body/div[2]/div[1]/div/div[1]/form/button"))).click()
I tried changing the cdc_ variable in the chromedriver. I tried using undetected_selenium. I tried using selenium-stealth (currently in the code too), changing user agents etc. Also it is important to state that I use an excel worksheet to retrieve account data (via openpyxl)

Related

Selenium cant puul the products prices from a website

I have a program that reads data from a website using selenium. In this case I want to get the prices of the perfumes.
for i in driver.find_elements_by_css_selector("spam.items_show_price_text"):
print(i.text)
When I run the program it prints for me 139 empty lines(139 is the number of the perfumes that are in the website).
I guess it has to do with the "span.items_show_price_text" parameter of the driver.find_elements_by_css_selector function.
The whole code (there is no need to understand the while loop, it is basicly runs the program for all of the page, because it is loding when you scroll down)
from datetime import date
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
urlM = 'https://www.myperfume.co.il/155567-%D7%9B%D7%9C-%D7%94%D7%9E%D7%95%D7%AA%D7%92%D7%99%D7%9D-%D7%9C%D7%92%D7%91' \
'%D7%A8?order=up_title&page=0'
urlF = 'https://www.myperfume.co.il/155569-%D7%9B%D7%9C-%D7%94%D7%9E%D7%95%D7%AA%D7%92%D7%99%D7%9D-%D7%9C%D7%90%D7%99' \
'%D7%A9%D7%94?order=up_title&page=0'
scope = ["https://spreadsheets.google.com/feeds", 'https://www.googleapis.com/auth/spreadsheets',
"https://www.googleapis.com/auth/drive.file", "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
client = gspread.authorize(creds)
spreadsheet = client.open("Perfumes")
options = ChromeOptions()
options.headless = True
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
namesM = []
pricesM = []
namesMe = ' '
lenM = 0
num = 0
while len(namesMe) != 0:
urlM = urlM[:-1] + str(int(urlM[-1]) + 1)
# ---=MALE=---
driver.get(urlM)
# names
for i in driver.find_elements_by_css_selector("h3.title.text-center"):
lenM += 1
namesM.append(i.text)
# prices
for i in driver.find_elements_by_css_selector("spam.items_show_price_text"):
print(i.text)
print(i.text)
As I see on that site, the locator for actual prices (after the price off) will be
'span.price'
So, instead of
for i in driver.find_elements_by_css_selector("spam.items_show_price_text"):
Try using
for i in driver.find_elements_by_css_selector("span.price"):
You should also add waits/ delays there.
Again, it is preferably to use Expected Conditions explicit waits for that.
And don't mess spam with span :)

send() function in selenium python is not working

import os, time, keyword, re, getpass, urllib, requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from urllib import request
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
Decision_from = input("Enter the Date From (DD-MM-YYYY):- ")
download_dir = "d:/dirName"
profile = webdriver.FirefoxProfile()
profile.set_preference("plugin.state.flash", 0)
profile.set_preference("plugin.state.java", 0)
profile.set_preference("media.autoplay.enabled", False)
profile.set_preference("browser.download.folderList", 2)
# whether or not to show the Downloads window when a download begins.
profile.set_preference("browser.download.manager.showWhenStarting", False)
profile.set_preference("browser.download.dir", download_dir)
profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
"application/octet-stream" +
",application/zip" +
",application/x-rar-compressed" +
",application/x-gzip" +
",application/msword")
driver = webdriver.Firefox(firefox_profile=profile, executable_path=r'd:/geckodriver.exe')
driver.get("https://dsscic.nic.in/cause-list-report-web/view-decision/1")
driver.find_element_by_class_name('form-control').send_keys(Decision_from)
driver.find_element_by_xpath("//*[#id='submit']").click()
driver.find_element_by_xpath("//*[#id='page_length']/option[text()='5000']").click()
rows = len(driver.find_elements_by_xpath("//*[#id='wrapperContent']/div/div/div/section/div/div/div/table/tbody[2]/tr"))
columns = len(driver.find_elements_by_xpath("//*[#id='wrapperContent']/div/div/div/section/div/div/div/table/tbody[2]/tr/td"))
suni="//*[#id='wrapperContent']/div/div/div/section/div/div/div/table/tbody[2]/tr["
ashu="]/td[9"
pansing="]/form/button"
for t_row in range(1, (rows + 1)):
if t_row == 1:
print("Hello Jaaneman First Row Empty")
else:
Final_Path = suni + str(t_row) + ashu + pansing
driver.find_element_by_xpath(Final_Path).click()
handles = driver.window_handles
size = len(handles)
parent_handle = driver.current_window_handle
if handles[1] != parent_handle:#if handles[2] != parent_handle:
driver.switch_to.window(handles[1])#driver.switch_to.window(handles[2])
ActionChains(driver).key_down(Keys.ALT).send_keys("F", "A").key_up(Keys.ALT).send_keys(Keys.HOME)
driver.close()
driver.switch_to.window(parent_handle)
All program run properly but "ActionChains(driver).key_down(Keys.ALT).send_keys("F", "A").key_up(Keys.ALT).send_keys(Keys.HOME)" not working. how to resolve it.
i'm newly for selenium python. Please guide to me
You need to add .perform() at the end:
ActionChains(driver).key_down(Keys.ALT).send_keys("F", "A").key_up(Keys.ALT).send_keys(Keys.HOME).perform()
From the documentation:
When you call methods for actions on the ActionChains object, the actions are stored in a queue in the ActionChains object. When you call perform(), the events are fired in the order they are queued up.

Unable to scrape google images selenium

I have the following script which i want it to scrapes google images. It clicks on the image first and then clicks on next (>) button to switch to the next image.
It downloads the first image, but when it's turn of the second image then it throws me an error.
Traceback (most recent call last):
File "c:/Users/intel/Desktop/Scrappr/image_scrape.pyw", line 40, in <module>
attribute_value = WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.CLASS_NAME, 'n3VNCb'))).get_attribute("src")
File "C:\Users\intel\AppData\Local\Programs\Python\Python38\lib\site-packages\selenium\webdriver\support\wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
My code :
import requests
import shutil
import time
import urllib
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup as Soup
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) ' \
'Chrome/80.0.3987.132 Safari/537.36'
options = Options()
#options.add_argument("--headless")
options.add_argument(f'user-agent={user_agent}')
options.add_argument("--disable-web-security")
options.add_argument("--allow-running-insecure-content")
options.add_argument("--allow-cross-origin-auth-prompt")
driver = webdriver.Chrome(executable_path=r"C:\Users\intel\Downloads\setups\chromedriver.exe", options=options)
driver.get("https://www.google.com/search?q=mac+beautiful+ui&tbm=isch&ved=2ahUKEwiL3ILMveToAhWGCHIKHVPNAScQ2-cCegQIABAA&oq=mac+beautiful+ui&gs_lcp=CgNpbWcQAzoECAAQQzoCCAA6BQgAEIMBOgYIABAFEB46BggAEAgQHlDPI1iEUWCgU2gAcAB4AIAByAKIAd8dkgEHMC40LjkuM5gBAKABAaoBC2d3cy13aXotaW1n&sclient=img&ei=Q9-TXsuuMoaRyAPTmoe4Ag&bih=657&biw=1360")
driver.find_element_by_class_name("rg_i").click()
i = 0
while i < 10:
i += 1
time.sleep(5)
attribute_value = WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'img.n3VNCb'))).get_attribute("src")
print(attribute_value)
resp = requests.get(attribute_value, stream=True)
local_file = open(r'C:/users/intel/desktop/local_image'+ str(i) + '.jpg', 'wb')
resp.raw.decode_content = True
shutil.copyfileobj(resp.raw, local_file)
del resp
driver.find_element_by_xpath("""//*[#id="Sva75c"]/div/div/div[3]/div[2]/div/div[1]/div[1]/div/div[1]/a[2]/div""").click()
I've tidied up and refactored a bit your code. The final result is capable of grabbing n amount of images for keywords of your choice (see SEARCH_TERMS):
import base64
import os
import requests
import time
from io import BytesIO
from PIL import Image
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
CHROME_DRIVER_LOCATION = r'C:\Users\intel\Downloads\setups\chromedriver.exe'
SEARCH_TERMS = ['very', 'hot', 'chicks']
TARGET_SAVE_LOCATION = os.path.join(r'c:\test', '_'.join([x.capitalize() for x in SEARCH_TERMS]), r'{}.{}')
if not os.path.isdir(os.path.dirname(TARGET_SAVE_LOCATION)):
os.makedirs(os.path.dirname(TARGET_SAVE_LOCATION))
def check_if_result_b64(source):
possible_header = source.split(',')[0]
if possible_header.startswith('data') and ';base64' in possible_header:
image_type = possible_header.replace('data:image/', '').replace(';base64', '')
return image_type
return False
def get_driver():
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) ' \
'Chrome/80.0.3987.132 Safari/537.36'
options = Options()
#options.add_argument("--headless")
options.add_argument(f'user-agent={user_agent}')
options.add_argument("--disable-web-security")
options.add_argument("--allow-running-insecure-content")
options.add_argument("--allow-cross-origin-auth-prompt")
new_driver = webdriver.Chrome(executable_path=CHROME_DRIVER_LOCATION, options=options)
new_driver.get(f"https://www.google.com/search?q={'+'.join(SEARCH_TERMS)}&source=lnms&tbm=isch&sa=X")
return new_driver
driver = get_driver()
first_search_result = driver.find_elements_by_xpath('//a/div/img')[0]
first_search_result.click()
right_panel_base = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, f'''//*[#data-query="{' '.join(SEARCH_TERMS)}"]''')))
first_image = right_panel_base.find_elements_by_xpath('//*[#data-noaft="1"]')[0]
magic_class = first_image.get_attribute('class')
image_finder_xp = f'//*[#class="{magic_class}"]'
# initial wait for the first image to be loaded
# this part could be improved but I couldn't find a proper way of doing it
time.sleep(3)
# initial thumbnail for "to_be_loaded image"
thumbnail_src = driver.find_elements_by_xpath(image_finder_xp)[-1].get_attribute("src")
for i in range(10):
# issue 4: All image elements share the same class. Assuming that you always click "next":
# The last element is the base64 encoded thumbnail version is of the "next image"
# [-2] element is the element currently displayed
target = driver.find_elements_by_xpath(image_finder_xp)[-2]
# you need to wait until image is completely loaded:
# first the base64 encoded thumbnail will be displayed
# so we check if the displayed element src match the cached thumbnail src.
# However sometimes the final result is the base64 content, so wait is capped
# at 5 seconds.
wait_time_start = time.time()
while (target.get_attribute("src") == thumbnail_src) and time.time() < wait_time_start + 5:
time.sleep(0.2)
thumbnail_src = driver.find_elements_by_xpath(image_finder_xp)[-1].get_attribute("src")
attribute_value = target.get_attribute("src")
print(attribute_value)
# issue 1: if the image is base64, requests get won't work because the src is not an url
is_b64 = check_if_result_b64(attribute_value)
if is_b64:
image_format = is_b64
content = base64.b64decode(attribute_value.split(';base64')[1])
else:
resp = requests.get(attribute_value, stream=True)
temp_for_image_extension = BytesIO(resp.content)
image = Image.open(temp_for_image_extension)
image_format = image.format
content = resp.content
# issue 2: if you 'open' a file, later you have to close it. Use a "with" pattern instead
with open(TARGET_SAVE_LOCATION.format(i, image_format), 'wb') as f:
f.write(content)
# issue 3: this Xpath is bad """//*[#id="Sva75c"]/div/div/div[3]/div[2]/div/div[1]/div[1]/div/div[1]/a[2]/div""" if page layout changes, this path breaks instantly
svg_arrows_xpath = '//div[#jscontroller]//a[contains(#jsaction, "click:trigger")]//*[#viewBox="0 0 24 24"]'
next_arrow = driver.find_elements_by_xpath(svg_arrows_xpath)[-3]
next_arrow.click()
Disclaimer: I doubt that Google allows scraping on Search. You should check out https://www.google.com/robots.txt to find out.
That being said, I think there is a problem in your WebDriverWait method, though I am not sure what exactly it is. Since you already have your driver wait before that with time.sleep, I just tried to find the element directly, and it worked:
i = 0
while i < 10:
i += 1
time.sleep(5)
attribute_value = driver.find_element_by_css_selector("img.n3VNCb").get_attribute("src") # NEW LINE
print(attribute_value)
resp = requests.get(attribute_value, stream=True)
local_file = open(r'C:/users/intel/desktop/local_image'+ str(i) + '.jpg', 'wb')
resp.raw.decode_content = True
shutil.copyfileobj(resp.raw, local_file)
del resp
driver.find_element_by_xpath("""//*[#id="Sva75c"]/div/div/div[3]/div[2]/div/div[1]/div[1]/div/div[1]/a[2]/div""").click()

Any way to leave selenium web pages open after running your script completely in python?

Ive been looking around the web for a solution to this. The issue im having is that my webpages close immediatelt after they open up, this is becuase the script is done. But, in my case i want them to stay so i can use them. is there a certian way to leave the pages open, even after the script finishes? This is my code...
import selenium.webdriver as webdriver
from selenium.webdriver.chrome.options import Options
chrome_path = r"C:\Users\Michael\PycharmProjects\FlashCop\chromedriver.exe"
times = ""
start_page = ""
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
def start_pages(target_page):
for x in range(0, len(page_number)):
driver = webdriver.Chrome()
driver.get(target_page)
chrome_options.add_experimental_option("detach", True)
while times == "":
times = input("How many pages do you want?\n")
url = input("Yeezy Supply or Adidas?""\nEither 'YS' or 'Adidas'\n")
url_choice = url.lower()
page_number = list()
for i in range(0, int(times)):
page_number.append(times)
if url_choice == 'ys':
start_page = 'https://yeezysupply.com/'
start_pages(start_page)
elif url_choice == 'adidas':
start_page = 'https://www.adidas.com/yeezy'
start_pages(start_page)
The experimental option detach is what will keep the browser open. You define this option, but haven't added it to your driver.
def start_pages(target_page):
for x in range(0, len(page_number)):
chrome_options.add_experimental_option("detach", True)
driver = webdriver.Chrome(chrome_path, chrome_options=chrome_options)
driver.get(target_page)

Not able to select value - error "Select only works on <select> elements, not on <a>"

I am trying to retrieve all possible year, model and make from [https://www.osram-americas.com/en-us/applications/automotive-lighting-systems/Pages/lrgmain.aspx]
from selenium import webdriver
import time
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup
driver = webdriver.Chrome()
driver.get("https://www.osram-americas.com/en-us/applications/automotive-lighting-systems/Pages/lrgmain.aspx")
# Switch to new window opened
driver.switch_to.window(driver.window_handles[-1])
# Close the new window
driver.close()
# Switch back to original browser (first window)
driver.switch_to.window(driver.window_handles[0])
el = driver.find_element_by_css_selector("div#fldYear a.sbToggle")
el.click()
select = Select(el)
select.select_by_visible_text("2016")
However if I try to select year 2016. It is giving error :
Select only works on <select> elements, not on <a>
You don't actually need selenium and automate any visual interactions to get the year+make+model data from the page and can approach the problem with requests only making appropriate GET requests:
# -*- coding: utf-8 -*-
from collections import defaultdict
from pprint import pprint
import requests
year = 2016
d = defaultdict(lambda: defaultdict(list))
with requests.Session() as session:
session.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
session.get("https://www.osram-americas.com/en-us/applications/automotive-lighting-systems/Pages/lrgmain.aspx")
while True:
response = session.get("https://www.osram-americas.com/_Layouts/Sylvania.Web.LRGHandler/LRGHandler.ashx",
params={"rt": "fetchmake", "year": str(year)})
data = response.json()
if not data: # break if no makes in a year
break
for make in data:
response = session.get("https://www.osram-americas.com/_Layouts/Sylvania.Web.LRGHandler/LRGHandler.ashx",
params={"rt": "fetchmodel", "year": str(year), "make": make["Id"]})
for model in response.json():
d[year][make["Value"]].append(model["Value"])
year -= 1
pprint(dict(d))
The problem with your current approach is that you are finding the a element and trying to use it as a select element. Select class will only work with select elements.
Note that in this case, it's easier to get to the invisible select element and get the years from its options directly:
options = [option.get_attribute("innerText") for option in driver.find_elements_by_css_selector("select#ddlYear option")[1:]]
print(options)
The [1:] slice here is to skip the very first Select Year element.
Complete working code:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
driver = webdriver.Chrome("/usr/local/bin/chromedriver")
driver.get("https://www.osram-americas.com/en-us/applications/automotive-lighting-systems/Pages/lrgmain.aspx")
wait = WebDriverWait(driver, 10)
toggle = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#fldYear a.sbToggle")))
toggle.click()
options = [option.get_attribute("innerText") for option in driver.find_elements_by_css_selector("select#ddlYear option")[1:]]
print(options)
Prints:
[u'2016', u'2015', u'2014', u'2013', u'2012', u'2011', u'2010', u'2009', u'2008', u'2007', u'2006', u'2005', u'2004', u'2003', u'2002', u'2001', u'2000', u'1999', u'1998', u'1997', u'1996', u'1995', u'1994', u'1993', u'1992', u'1991', u'1990', u'1989', u'1988', u'1987', u'1986', u'1985', u'1984', u'1983', u'1982', u'1981', u'1980', u'1979', u'1978', u'1977', u'1976', u'1975', u'1974', u'1973', u'1972', u'1971', u'1970', u'1969', u'1968', u'1967', u'1966', u'1965', u'1964', u'1963', u'1962', u'1961', u'1960', u'1959', u'1958', u'1957', u'1956', u'1955']

Categories