threading cannot handle the following tasks - python

i tried to create a threading to login with a lot of usage, using python Threading, but I got a little error, my code can't handle the following task, I have 10 users, and run threading with threading, python code stops after 2 user logins, and the rest cannot be called, some code please add to fix this problem
python
print (variable)
import threading
from selenium import webdriver
import time
import numpy as np
from csv import reader
def Login_browser(i):
id_str = ids_pass_list[i][0]
id_pass = ids_pass_list[i][1]
print(i)
print("Login Id: ", id_str)
print("Login Password: ", id_pass)
Options = webdriver.ChromeOptions()
Options.add_argument('--app-https://google.com')
driver = webdriver.Chrome(options=Options)
driver.get('http://localhost/login2/index.php')
time.sleep(1)
try:
driver.get('http://localhost/login2/index.php')
time.sleep(1)
email_field = driver.find_element_by_name("userid")
email_field.send_keys(id_str)
time.sleep(3)
password_field = driver.find_element_by_name("pass")
password_field.send_keys(id_pass)
time.sleep(3)
submit = driver.find_element_by_xpath("/html/body/center/form/table/tbody/tr[3]/td/button[1]")
submit.click()
time.sleep(30)
driver.quit()
except:
time.sleep(10)
Page_Thread = 2
threads = []
with open('user.csv', 'r') as read_obj:
csv_reader = reader(read_obj)
list_of_rows = list(csv_reader)
total_Len = len(list_of_rows)
ids_pass_list = list_of_rows
for i in range(Page_Thread):
threads += [threading.Thread(target=Login_browser,args={i},)]
for t in threads:
t.start()
for t in threads:
t.join
print ( 'succses')

Related

Python ProcessPoolExecutor don't want to execute the function

I using python 3 and mcstatus
My source code is:
from mcstatus import JavaServer
import concurrent.futures
import json
imput_file_name = "dockerformcstatus\dockerMyScript\pyscript\input.txt"
def scaner(data):
print("Processing data: ", data)
global serverJsonData
global timeout
server = JavaServer.lookup(data,timeout)
print(server)
try:
serverJson = server.status().raw
print(serverJson)
serverJsonData.append(serverJson)
except:
print("can't connect")
if __name__ == "__main__":
serverJsonData = []
timeout = 5
data = None
with open(imput_file_name,"r") as f:
data = f.readlines()
#print(data)
with concurrent.futures.ProcessPoolExecutor() as executor:
executor.map(scaner,data)
#scaner("168.119.4.46:25619")
with open("serverdata.json", "w") as f:
f.write(json.dumps(serverJsonData))
The problem is theat If I use the with concurrent.futures.ProcessPoolExecutor() as executor executor.map(scaner,data) then it not wants to write into the list "serverJsonData", but if I run it as scaner("168.119.4.46:25619") then it works.
I Tryed debug it but not get the reason for this "bug"

I want to resume my python from the state it was stopped , how do i do that?

I'm inserting my code below. I couldn't really do anything on this topic, I'm still learning so that isn't helping either.
I'm inserting my code below. I couldn't really do anything on this topic, I'm still learning so that isn't helping either.
Someone suggested using pickle, JSON, or SQL but I'm not familiar with any of them.
I'm using replit so it automatically restarts the bot .
import praw
import time
import random
import requests
import sys
from sys import exit
print("Starting Magic............")
print(reddit.user.me())
REDDIT_USERNAME=(reddit.user.me())
response = requests.get("https://www.reddit.com/user/{}/about.json".format(REDDIT_USERNAME), headers = {'User-agent': 'hiiii its {}'.format(REDDIT_USERNAME)}).json()
if "error" in response:
if response["error"] == 404:
print("account {} is shadowbanned. poor bot :( shutting down the script...".format(REDDIT_USERNAME))
sys.exit()
else:
print(response)
else:
print("{} is not shadowbanned! We think..".format(REDDIT_USERNAME))
title = input("Enter an epic title: ")
title2 = input("Enter an epic title: ")
url = input("Enter a sassy link: ")
url2 = input("Enter a sassy link: ")
print("Reading reddit list")
subredit_list = open("data.txt", "r")
subreddits = subredit_list.read().split(',')
for subreddit in subreddits:
try:
print(subreddit)
reddit.validate_on_submit = True
submission = reddit.subreddit(subreddit).submit(title,url=url)
time.sleep(10)
print ("done")
except Exception as err:
print("Exception for subreddit {}, {}".format(subreddit, err))
t= random.randint(615,815)
seconds = "Sleeping for {} seconds before proceeding".format(t)
print(seconds)
time.sleep(t)
print("Reading reddit list")
subredit_list = open("data.txt", "r")
subreddits = subredit_list.read().split(',')
for subreddit in subreddits:
try:
print(subreddit)
reddit.validate_on_submit = True
submission = reddit.subreddit(subreddit).submit(title2,url=url2)
time.sleep(10)
print ("done")
except Exception as err:
print("Exception for subreddit {}, {}".format(subreddit, err))
t= random.randint(505,825)
seconds = "Sleeping for {} seconds before proceeding".format(t)
print(seconds)
time.sleep(t)

How can I add threading on my Python code?

Below is my try to create a username availability checker with proxies, so far it works as intended
the only thing is that its slow, i tried to implement threads but no different as im not sure if im doing it right or not.
used concurrent.futures and threading libraries.
Is there a better way to code this kind of programs or are there any other suggestions?
Thanks in advance
import requests
import json
import ctypes
import colorama
from colorama import Fore
from datetime import datetime
import os
os.system("cls")
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
colorama.init()
url = "https://link"
def grab_proxies():
proxylist = []
prx = open('proxy.txt','r')
prx = prx.readlines()
for proxy in prx:
proxy = proxy.rstrip("\n")
proxylist.append(proxy)
return proxylist
prlist = grab_proxies()
def grab_usernames():
userlist = []
users = open('userlist.txt','r')
users = users.readlines()
for user in users:
user = user.rstrip("\n")
userlist.append(user)
return userlist
ulist = grab_usernames()
found = 0
pc = 0
uc = 0
for i in range(0,len(prlist)):
ctypes.windll.kernel32.SetConsoleTitleW(f"[# Checker] | Counter: %s - Found: %s - Current Proxy: %s - Started at: %s" % (i, found, prlist[pc], current_time))
try:
req = requests.post(url,headers=headers, data = {"requested_username": ulist[uc], "xsrf_token": "F0kpyvjJgeBtsOk5Gl6Jvg"},proxies={'http' : prlist[pc],'https': prlist[pc]}, timeout=2)
response = req.json()
#print(response,req.status_code)
#print(response)
#print(type(response))
if(response['reference']['status_code'] == 'TAKEN'):
#rd = response['errors']['username'][0]['code']
print(f'{Fore.LIGHTBLACK_EX}[{Fore.LIGHTRED_EX}Taken{Fore.LIGHTBLACK_EX}]{Fore.LIGHTCYAN_EX} {ulist[uc]}')
#print(ulist[uc]+" Taken")
uc+=1
elif(response['reference']['status_code'] == 'OK'):
print(f'{Fore.LIGHTBLACK_EX}[{Fore.LIGHTGREEN_EX}Available{Fore.LIGHTBLACK_EX}]{Fore.LIGHTCYAN_EX} {ulist[uc]}')
#print(ulist[uc]+" Available")
f = open("found.txt","a")
f.write(ulist[uc]+"\n")
f.close()
found+=1
uc+=1
elif(response['reference']['status_code'] == 'INVALID_BEGIN'):
print(f'{Fore.LIGHTBLACK_EX}[{Fore.LIGHTRED_EX}Invalid Username{Fore.LIGHTBLACK_EX}]{Fore.LIGHTCYAN_EX} {ulist[uc]}')
uc+=1
elif(response['reference']['status_code'] == 'DELETED'):
print(f'{Fore.LIGHTBLACK_EX}[{Fore.LIGHTRED_EX}Deleted{Fore.LIGHTBLACK_EX}]{Fore.LIGHTCYAN_EX} {ulist[uc]}')
uc+=1
else:
print(response)
except:
#print(prlist[pc]+ " Going to next proxy")
pc+=1
pass
#break
x = input("Finished!.. press enter to exit")
You could use https://github.com/encode/requests-async to do your requests in an async way

Python Selenium Script:

I wrote a script to save LinkedIn information like: name, last name, graduated university and most important link to LinkedIn script. My script is using Selenium and chromedriver to enter LinkedIn and then scrape. My problem is with saving profile links. Links aren't scraping properly. Here's my code:
import csv
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.keys import Keys
import parameters
import re
class LinkedIn():
def __init__(self):
self.driver = webdriver.Chrome()
self.people_ls_dic = []
self.csv_name_colums = ["name","degree_connection","zawod","region","opis","firma","link"]
def login(self):
self.driver.get("http://www.linkedin.com/login")
sleep(3)
username = self.driver.find_element_by_name('session_key')
username.send_keys(parameters.linkedin_username)
password = self.driver.find_element_by_name('session_password')
password.send_keys(parameters.linkedin_password)
sign_in_button = self.driver.find_elements_by_xpath('//*[#class="btn__primary--large from__button--floating mercado-button--primary"]')
sign_in_button[0].click()
sleep(5)
def neville_try(self):
sleep(3)
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
profiles = self.driver.find_element_by_xpath('/html/body/div[7]/div[3]/div/div[2]/div/div/div/div[2]/ul')
profiles = profiles.find_elements_by_css_selector('li')
profiles = [(i.text, i.find_element_by_xpath('//*[#data-control-name="entity_result"]').get_attribute('href')) for i in profiles]
print("\n\n")
info_ls = []
for profile, link in profiles:
info_ls.append( (profile.split('\n'), link) )
for iteam, link in info_ls:
if 'Learn more' in iteam:
info_ls.remove(iteam)
print(info_ls)
info_ls = [(iteam, link) for iteam, link in info_ls if iteam != ['']]
for info, link in info_ls:
if info[0] == info[1]:
info.remove(info[1])
try:
name = info[0]
degree_connection = info[2]
zawod = info[3]
region = info[4]
opis = info[5]
opis_f = opis.replace(","," ")
list_of_user_data = [name, zawod, opis_f]
for data in list_of_user_data:
try:
comp = re.findall('at ([a-zA-Z0-9]+)',data)
firma = comp[0]
break
except:
continue
if comp == []:
firma = "brak_danych"
self.people_ls_dic.append({"name":name,"degree_connection":degree_connection,"zawod":zawod,"region":region,"opis":opis,"firma":firma,"link":link})
except:
pass
def go_home(self):
home = self.driver.find_element_by_xpath('//*[#id="inbug-nav-item"]/a')
home.click()
def next_page(self):
sleep(3)
next_p = self.driver.find_element_by_xpath('//*[#aria-label="Next"]')
next_p.click()
def open_people(self):
self.driver.get("https://www.linkedin.com/search/results/people/?origin=DISCOVER_FROM_SEARCH_HOME")
sleep(2)
search_bar = self.driver.find_element_by_xpath('//*[#class="search-global-typeahead__input always-show-placeholder"]')
search_bar.send_keys(parameters.search_query)
search_bar.send_keys(Keys.ENTER)
sleep(3)
def filter_company(self):
cl = self.driver.find_element_by_xpath('//*[#aria-label="Current company filter. Clicking this button displays all Current company filter options."]')
cl.click()
for comp in parameters.list_of_comp:
text = self.driver.find_element_by_xpath('//*[#placeholder="Add a company"]')
text.send_keys(comp)
sleep(1)
filt = self.driver.find_element_by_xpath('/html/body/div[7]/div[3]/div/div[1]/nav/div/div[1]/div/div[2]/ul/li[5]/div/div/div/div[1]/div/form/fieldset/div[1]/div/div/div[2]/div/div[2]')
sleep(0.2)
filt.click()
sleep(1)
apply = self.driver.find_element_by_xpath('/html/body/div[7]/div[3]/div/div[1]/nav/div/div[1]/div/div[2]/ul/li[5]/div/div/div/div[1]/div/form/fieldset/div[2]/button[2]')
apply.click()
sleep(1)
def close(self):
self.driver.close()
def write_to_csv(self):
csv_file = "neville.csv"
with open(csv_file, 'w', encoding="utf-8", newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames = self.csv_name_colums)
writer.writeheader()
for data in self.people_ls_dic:
writer.writerow(data)
scrypt = LinkedIn()
scrypt.login()
scrypt.open_people()
ls = range(parameters.ilosc_stron)
scrypt.filter_company()
for i in sorted(ls,reverse=True):
scrypt.neville_try()
if i == 1:
break
scrypt.next_page()
scrypt.write_to_csv()
scrypt.close()
Ofc I have file with parameters and i looks' like this:
linkedin_username = ""
linkedin_password = ""
search_query = 'vcloud director'
list_of_comp = ['Microsoft']
ilosc_stron = 2 //number of pages to click on

How to prevent my Selenium/Python program from crashing?

I made a program which gets one record from Google Sheet process on it then delete it and so on. If I update Google Sheet then the program will deduct record in the next loop and process on it and then delete,
but it runs only 1 or 2 hours and then program gives an error:
What can I add in my program so my program never stops?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import traceback
import string
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from selenium.common.exceptions import NoAlertPresentException
from selenium.common.exceptions import UnexpectedAlertPresentException
Email=raw_input('Please Enter your Email: ')
password=raw_input('Please Enter Password: ')
print("\n******Don't Interrupt the Script******")
print('#script is Runing............\n')
chrome_options = webdriver.ChromeOptions() #going to chrome options
chrome_options.add_argument("--start-maximized")
prefs = {"profile.default_content_setting_values.notifications" : 2 #turn off all notifications
,"profile.managed_default_content_settings.images": 2} #disable images
chrome_options.add_experimental_option("prefs",prefs)
driver = webdriver.Chrome(chrome_options=chrome_options) # passing paramaters to chrome
driver.get('https://accounts.google.com')
time.sleep(3)
#giving Email-------------------
email = driver.find_element_by_id('Email')
email.send_keys(Email, Keys.RETURN)
#giving password----------------
time.sleep(3)
email = driver.find_element_by_id('Passwd')
email.send_keys(password, Keys.RETURN)
#credentials + attach with googleSheet------------------------------
scope = ['https://spreadsheets.google.com/feeds']
credentials = ServiceAccountCredentials.from_json_keyfile_name('stephens-31d8490b5bd2.json', scope)
google_sheet = gspread.authorize(credentials)
workSheet = google_sheet.open("Video Access Master Sheet").worksheet("Sheet1")
while True:
#fetch Records from Rows 2 to 50 and save on list-----------------
for i in range(2,51):
li_url=[]
li_email=[]
row=workSheet.row_values(i)
for b in row:
if 'youtu' in b:
li_url.append(b)
#find record which you append on list and then delete from googleSheet--------------------
cell = workSheet.find(b)
row = cell.row
col = cell.col
workSheet.update_cell(row,col, '')
print 'Fetching Values From Row '+str(i)+'....'
elif '#' in b:
li_email.append(b)
elif b=='':
continue
else:
continue
#*********************************************************
#getting length list of li_url and apply condition on it-----------------------------------------------
length=len(li_url)
if length==0:
continue
else:
try:
#getting URLs from list and put into driver.get---------------------------------------------------------
for a in li_url:
driver.get(a)
time.sleep(3)
driver.find_element_by_css_selector('.yt-uix-button-icon.yt-uix-button-icon-info.yt-sprite').click()
time.sleep(3)
driver.find_element_by_css_selector('.yt-uix-button.yt-uix-button-size-default.yt-uix-button-default.metadata-share-button').click()
time.sleep(2)
put_email=driver.find_element_by_css_selector('.yt-uix-form-input-textarea.metadata-share-contacts')
#getting emails from email list--------------------------------------------------------------
put_email.send_keys(li_email[0])
time.sleep(2)
driver.find_element_by_css_selector('.yt-uix-button.yt-uix-button-size-default.yt-uix-button-primary.sharing-dialog-button.sharing-dialog-ok').click()
time.sleep(4)
driver.find_element_by_xpath('.//*[#id="video-header"]/div/button[2]/span').click()
time.sleep(10)
#for notifications and alters--------------------------------------------
try:
driver.switch_to.alert.accept()
except NoAlertPresentException:
pass
except UnexpectedAlertPresentException:
pass
except:
traceback.print_exc
pass
print 'Row '+str(i)+' Successfully Updated. \n'
time.sleep(120) #while loop sleep for 20minuts
This is the error I got:
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "<string>", line 56, in parse
File "<string>", line 35, in parse
cElementTree.ParseError: no element found: line 1, column 0
For some reason cell = workSheet.find(b) fails. Could be bad data in there; without seeing the input it's anyone's guess.
Since you already know the row number, you can avoid using cell = workSheet.find(b) by simply keeping track of the columns you're searching through and finally calling workSheet.update_cell(i, col, '') after copying the data.

Categories