How can I add threading on my Python code? - python

Below is my try to create a username availability checker with proxies, so far it works as intended
the only thing is that its slow, i tried to implement threads but no different as im not sure if im doing it right or not.
used concurrent.futures and threading libraries.
Is there a better way to code this kind of programs or are there any other suggestions?
Thanks in advance
import requests
import json
import ctypes
import colorama
from colorama import Fore
from datetime import datetime
import os
os.system("cls")
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
colorama.init()
url = "https://link"
def grab_proxies():
proxylist = []
prx = open('proxy.txt','r')
prx = prx.readlines()
for proxy in prx:
proxy = proxy.rstrip("\n")
proxylist.append(proxy)
return proxylist
prlist = grab_proxies()
def grab_usernames():
userlist = []
users = open('userlist.txt','r')
users = users.readlines()
for user in users:
user = user.rstrip("\n")
userlist.append(user)
return userlist
ulist = grab_usernames()
found = 0
pc = 0
uc = 0
for i in range(0,len(prlist)):
ctypes.windll.kernel32.SetConsoleTitleW(f"[# Checker] | Counter: %s - Found: %s - Current Proxy: %s - Started at: %s" % (i, found, prlist[pc], current_time))
try:
req = requests.post(url,headers=headers, data = {"requested_username": ulist[uc], "xsrf_token": "F0kpyvjJgeBtsOk5Gl6Jvg"},proxies={'http' : prlist[pc],'https': prlist[pc]}, timeout=2)
response = req.json()
#print(response,req.status_code)
#print(response)
#print(type(response))
if(response['reference']['status_code'] == 'TAKEN'):
#rd = response['errors']['username'][0]['code']
print(f'{Fore.LIGHTBLACK_EX}[{Fore.LIGHTRED_EX}Taken{Fore.LIGHTBLACK_EX}]{Fore.LIGHTCYAN_EX} {ulist[uc]}')
#print(ulist[uc]+" Taken")
uc+=1
elif(response['reference']['status_code'] == 'OK'):
print(f'{Fore.LIGHTBLACK_EX}[{Fore.LIGHTGREEN_EX}Available{Fore.LIGHTBLACK_EX}]{Fore.LIGHTCYAN_EX} {ulist[uc]}')
#print(ulist[uc]+" Available")
f = open("found.txt","a")
f.write(ulist[uc]+"\n")
f.close()
found+=1
uc+=1
elif(response['reference']['status_code'] == 'INVALID_BEGIN'):
print(f'{Fore.LIGHTBLACK_EX}[{Fore.LIGHTRED_EX}Invalid Username{Fore.LIGHTBLACK_EX}]{Fore.LIGHTCYAN_EX} {ulist[uc]}')
uc+=1
elif(response['reference']['status_code'] == 'DELETED'):
print(f'{Fore.LIGHTBLACK_EX}[{Fore.LIGHTRED_EX}Deleted{Fore.LIGHTBLACK_EX}]{Fore.LIGHTCYAN_EX} {ulist[uc]}')
uc+=1
else:
print(response)
except:
#print(prlist[pc]+ " Going to next proxy")
pc+=1
pass
#break
x = input("Finished!.. press enter to exit")

You could use https://github.com/encode/requests-async to do your requests in an async way

Related

Two different while loops run simultaneously in Tkinter (python)

I have made a simple chat system with python-requests. There are two different files one is the sender and another is the receiver. the main concept of these two files is
1. sender file contains a while loop which always takes the message as input. after
giving the message as input, it sends the message to a website.
2. receiver file also contains a while loop which gets requests from the website after every
5 seconds.
Now I want to run these two different works in the same window with Tkinter. how to do it? Thanks in advance.
Sender.py Code is here
import configme as con
import requests
import datetime
from cryptography.fernet import Fernet
nam = con.my_name
cookies_dict = con.cookie
key = con.crypto_key
url = con.base_url + '/config.php'
def makeID():
return datetime.datetime.now().timestamp()
# encription staff
fernet = Fernet(key)
# member joining message
if nam.__len__() != 0:
requests.get(url+f"?iD={makeID()}&name=<<<>>>&msg={nam} join the room.", cookies=cookies_dict)
with requests.Session() as r:
while True:
msg = input("Enter your Messege: ")
if msg == ".exit":
# r.get(url+f"?iD={makeID()}&name=<<<>>>&msg={nam} has left the room.", cookies=cookies_dict)
break
else:
encMessage = fernet.encrypt(msg.encode())
messenger = {'iD': makeID() ,'name': nam , 'msg': encMessage}
if msg != "":
r.get(url, params=messenger, cookies=cookies_dict)
Receiver.py code here...
import configme as con
import requests
import json
from cryptography.fernet import Fernet
from time import sleep
from datetime import datetime
from pytz import timezone
import pytz
cookies_dict = con.cookie
ozone = con.my_timezone
key = con.crypto_key
time_format = con.date_time_format
url = con.base_url + '/log.json'
t = con.receive_time
# encription staff
fernet = Fernet(key)
timezone = timezone(ozone)
def setTime(t):
stamptime = int(float(t))
GMT0 = pytz.utc.localize(datetime.utcfromtimestamp(stamptime))
return GMT0.astimezone(timezone).strftime(time_format)
j = 0
while True:
r = requests.get(url, cookies=cookies_dict).text
message = json.loads(r)
message_sz = len(message)
if message_sz == 0:
print("Looks like there are no message")
break
for msg in message[j:]:
local_time = setTime(msg['id'])
if msg['nam'] == '<<<>>>':
print(f"{local_time} :: {msg['nam']} :: {msg['msg']}")
else:
decMessage = fernet.decrypt(bytes(msg['msg'], "utf-8")).decode()
print(f"{local_time} :: {msg['nam']} :: {decMessage}")
j = message_sz
sleep(t)
I would not suggest using this checking and going to website method, but you could thread the while loops to go at the same time. And you could update tk when you want using tk.update().
You could get Data from vars that the threaded loops are setting and use them in your single tk window.
use multi threading .or else load data desperately

Search haveibeenpwned for all emails on a domain

I am able to use haveibeenpwned to search for 1 account compromise. However, I could not find an option to use the API key to search for compromise of all the email accounts on a domain. (For example. if the domain is xyz.com, I want to search for the compromise of abc#xyz.com, peter.charlie#xyz.com and so on). I am aware of the notification email that I can sign up for. But, that is a lengthy process and I prefer using the API.
So, I wrote a script to search against haveibeenpwned for all the email address of my domain, but it takes very long. I searched through a couple of Github projects, but I did not find any such implementation. Has anyone tried this before?
I have added the code below. I am using Multi threading approach, but still it takes very long, is there any other Optimization strategy I can use? Please help. Thank you.
import requests, json
import threading
from time import sleep
import datetime
import splunklib.client as client
import splunklib.results as results
date = datetime.datetime.now()
from itertools import islice
import linecache
import sys
def PrintException():
exc_type, exc_obj, tb = sys.exc_info()
f = tb.tb_frame
lineno = tb.tb_lineno
filename = f.f_code.co_filename
linecache.checkcache(filename)
line = linecache.getline(filename, lineno, f.f_globals)
print 'EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)
class myThread (threading.Thread):
def __init__(self, threadID, name, list_emails):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.list_emails = list_emails
def run(self):
i=0
print "Starting " + self.name
for email in self.list_emails:
print i
i=i+1
result = check_pasteaccount(email)
print email
print result
print result
print "Exiting " + self.name
def check_pasteaccount(account):
account = str(account)
result = ""
URL = "https://haveibeenpwned.com/api/v3/pasteaccount/%s?truncateResponse=false" % (account)
# print(URL)
headers= {'hibp-api-key':api_key}
result = ""
try:
r = requests.get(url=URL,headers=headers)
# sleep(2)
status_code = r.status_code
if status_code == 200:
data = r.text
result = []
for entry in json.loads(data.decode('utf8')):
if int((date - datetime.datetime.strptime(entry['Date'], '%Y-%m-%dT%H:%M:%SZ')).days) > 120:
pass
else:
result.append(['Title: {0}'.format(entry['Title']), \
'Source: {0}'.format(['Source']), \
'Paste ID: {0}'.format(entry['Id'])])
if len(result) == 0:
result = "No paste reported for given account and time frame."
else:
paste_result = ""
for entry in result:
for item in entry:
paste_result += str(item) + "\r\n"
paste_result += "\r\n"
result = paste_result
elif status_code == 404:
result = "No paste for the account"
else:
if status_code == 429:
sleep(5)
# print "Limit exceeded, sleeping"
result = check_pasteaccount(account)
else:
result = "Exception"
print status_code
except Exception as e:
result = "Exception"
PrintException()
pass
return result
def split_every(n, iterable):
iterable = iter(iterable)
for chunk in iter(lambda: list(islice(iterable, n)), []):
yield chunk
def main():
print datetime.datetime.now()
# Fetching the list of email addresses from Splunk
list_emails = connect_splunk()
print datetime.datetime.now()
i=0
list_split = split_every(1000,list_emails)
threads=[]
for list in list_split:
i=i+1
thread_name = "Thread" + str(i)
thread = myThread(1, thread_name, list)
thread.start()
threads.append(thread)
# Wait for all the threads to complete
for t in threads:
t.join()
print "Completed Search"
Here's a shorter and maybe more efficient version of your script using the standard multiprocessing library instead of a hand-rolled thread system.
You'll need Python 3.6+ since we're using f-strings.
You'll need to install the tqdm module for fancy progress bars.
You can adjust the number of concurrent requests with the pool size parameter.
Output is written in machine-readable JSON Lines format into a timestamped file.
A single requests session is shared (per-worker), which means less time spent connecting to HIBP.
import datetime
import json
import multiprocessing
import random
import time
import requests
import tqdm
HIBP_PARAMS = {
"truncateResponse": "false",
}
HIBP_HEADERS = {
"hibp-api-key": "xxx",
}
sess = requests.Session()
def check_pasteaccount(account):
while True:
resp = sess.get(
url=f"https://haveibeenpwned.com/api/v3/pasteaccount/{account}",
params=HIBP_PARAMS,
headers=HIBP_HEADERS,
)
if resp.status_code == 429:
print("Quota exceeded, waiting for a while")
time.sleep(random.uniform(3, 7))
continue
if resp.status_code >= 400:
return {
"account": account,
"status": resp.status_code,
"result": resp.text,
}
return {
"account": account,
"status": resp.status_code,
"result": resp.json(),
}
def connect_splunk():
# TODO: return emails
return []
def main():
list_emails = [str(account) for account in connect_splunk()]
datestamp = datetime.datetime.now().isoformat().replace(":", "-")
output_filename = f"accounts-log-{datestamp}.jsonl"
print(f"Accounts to look up: {len(list_emails)}")
print(f"Output filename: {output_filename}")
with multiprocessing.Pool(processes=16) as p:
with open(output_filename, "a") as f:
results_iterable = p.imap_unordered(
check_pasteaccount, list_emails, chunksize=20
)
for result in tqdm.tqdm(
results_iterable,
total=len(list_emails),
unit="acc",
unit_scale=True,
):
print(json.dumps(result, sort_keys=True), file=f)
if __name__ == "__main__":
main()

Youtube V3 API doesn't sort video

So I have been using youtube api to scrape a channel. Everything was working fine until 3 days ago (03/15/2019) when the result isn't sorted anymore. It seems that no matter what I put in the order parameter, the results are all the same. Can anyone tell me why it isn't working? Here's the code snippet:
import re
import os
import json
import MySQLdb
from pytube import YouTube
import urllib
import isodate
import sys
def get_all_video_in_channel(channel_id):
api_key = '<MY KEY>'
video_url = 'https://www.googleapis.com/youtube/v3/videos?part=snippet,contentDetails&id={}&key={}'
first_url = 'https://www.googleapis.com/youtube/v3/search?key={}&channelId={}&part=snippet,id&order=date&maxResults=50'.format(api_key, channel_id) #order by date but won't work
res = []
url = first_url
while True:
inp = urllib.urlopen(url)
resp = json.load(inp)
vidIds = []
for jobject in resp['items']:
if jobject['id']['kind'] == "youtube#video":
vidIds.append(jobject['id']['videoId'])
vidreq = urllib.urlopen(video_url.format(",".join(vidIds),api_key))
vidres = json.load(vidreq)
for vidjson in vidres['items']:
res.append(vidjson)
if (len(res) >= 50):
break
try:
next_page_token = resp['nextPageToken']
url = first_url + '&pageToken={}'.format(next_page_token)
except:
break
return res
c_id = 'UCycyxZMoPwg9cuRDMyQE7PQ'
episodes = get_all_video_in_channel(c_id)
Edit: I did some more research and people say that the API indeed is not working properly due to Youtube doing something with deleting the New Zealand shooting video and it will soon be working properly again.
I recommend you to see the answer https://stackoverflow.com/a/55220182/8327971. This is a known and acknowledged issue by Google: https://issuetracker.google.com/issues/128673552.

Best way to make thousands of get requests in python

Right now I am working on a python script which takes in a list of url's as an argument, then performs a GET request on each url and then searches through the output with xpath to fingerprint the website. It seems to work like a charm when the list is around 50 sites long, but anything after that causes the program to slow down to the point where it stop (usually around 150 sites). Scroll down to where you see main app logic and the relevant code it below. Right now I am just using 50 elements in the array and it works fine, but anything after makes the entire program stop. Any suggestions would be greatly appreciated!
#!/usr/bin/python
# Web Scraper
# 1.0
# Imports for file
from multiprocessing.dummy import Pool as ThreadPool
from threading import Thread
from Queue import Queue
from lxml import html
import requests
import time
import sys
# Get Raw HTML
def scrape(url):
try:
page = requests.get(url, timeout=2.0)
if page.status_code == requests.codes.ok:
html_page = html.fromstring(page.content)
s =requests.session()
s.close()
return html_page
else:
s =requests.session()
s.close()
return False
except:
s =requests.session()
s.close()
return False
# Format URL
def format_url(url):
if url.find("http://") == -1:
url = "http://"+url
if url[-1] == "/":
url = url[:-1]
return url
# Check if WordPress Site
def check_wordpress(tree):
scripts = tree.xpath("//script[contains(#src,'wp-content')]")
if len(scripts) > 0:
return True
return False
# Check WordPress Version
def wordpress_version(tree):
type = tree.xpath("//meta[#name='generator']/#content")
version = 0
if len(type) > 0:
details = type[0].split()
if len(details)>1 and details[0] == "WordPress":
if len(details) > 1:
version = details[1]
else:
version = type[0]
return version
# Find Contact Page
def find_contact_page(tree):
contact = tree.xpath("//a[contains(text(),'Contact')]/#href")
try_xpath = 1
while len(contact) == 0:
if try_xpath == 1:
contact = tree.xpath("//span[contains(text(),'Contact')]/../#href")
elif try_xpath == 2:
contact = tree.xpath("//p[contains(text(),'Contact')]/../#href")
elif try_xpath == 3:
break
try_xpath+=1
if len(contact) > 0:
contact = contact[0]
if contact.find('#') == -1:
if contact[0] == '/':
contact = url + "" + contact
print contact
# Juicer method
def juice(url):
url = format_url(url)
string = url
tree = scrape(url)
if tree == False:
return string + " \t\t\t No XML tree"
elif check_wordpress(tree) == True:
version = wordpress_version(tree)
return string + " \t\t\t WordPress: " + str(version)
else:
return string + " \t\t\t Not WordPress"
# Main App Logic Below ------------------------------------->
# Open list of websites from given argument
list = open(sys.argv[1],'r').read().split('\n')
# Juice url
def juice_url():
while True:
url = q.get()
result = juice(url)
print result
q.task_done()
# Create concurrent queues
concurrent = 50
q = Queue(concurrent)
for i in range(concurrent):
t = Thread(target=juice_url)
t.daemon = True
t.start()
# Add URL to Queue
time1 = time.time()
for url in list[0:50]:
q.put(url)
q.join()
# Calculate total time
total = time.time() - time1
print "Total Time: %f" % total
print "Average Time: %f" % (total/50)

How to write JSON from response to file? (PYTHON)

so im trying to write this JSON from the Kik smiley site, and im trying to do this so I wont have to write it manually, anyways I need to parse the JSON so only some of the existing JSON shows up in the file (basically cleaning it) what I need from the site is... (name, id, type) how would I do this?
I have written this in python but it seems to fail, and im not 100% sure as to why. I am new to Python, so sorry if this is an obvious question! I did find something earlier but it just confused me even more :) Thank you!
import requests, json, sys
from colorama import init
from termcolor import colored
#colorama
init()
class SmileyGrabber():
def __init__(self):
# requests vars
self.smileysFound = 0
self.smileysLost = 0
self.url="https://sticker-service.appspot.com/v2/collection/smiley"
self.session = requests.Session()
self.grabSmiley()
def grabSmiley(self):
while True:
try:
r = self.session.get(self.url)
j = r.json()
try:
if j["IsSuccess"] == True:
meta = j["smileys"]
sID = meta["id"]
sType = meta["type"]
sName = meta["name"]
FormatSmileyData(sID, sType, sName)
print "Smiley Found:", colored("({0})".format(sName), "cyan")
self.smileysFound += 1
else:
print(colored("Could not grab smiley"), "red")
self.smileysLost += 1
except:
sys.exit()
except KeyboardInterrupt:
sys.exit()
except:
print r.text
sys.exit()
class FormatSmileyData(object):
def __init__(self, sID, sType, sName):
smileyData = {}
data = []
data.append({"SMILEY_ID":sID, "SMILEY_TYPE":sType, "SMILEY_NAME":sName})
dataFile = open("smileys.json", "a+")
dataFile.write(json.dumps(smileyData)+"\n")
dataFile.close()
if __name__ == "__main__":
SmileyGrabber()
There are a number of problems with your code.
It will be more efficient to read from the network all at once
rather than making a call to session.get for each smiley.
j does not have an "IsSuccess" element, so that will never be true
j["smileys"] is a list, so to get the dictionaries (which represent each smiley) you will need to iterate through that list.
You are appending data into data but you are writing from
smileyData, which never has any data entered into it.
Each time you call the FormatSmileyData constructor, you are
resetting the data.
Take a look at a tool like Postman to prettify the JSON so you can see the structure. This can help figure out how to parse it.
Here's an updated version of your script that appears to work:
I removed the colorization and made it work with Python 3.
import requests, json, sys
class SmileyGrabber():
def __init__(self):
# requests vars
self.smileysFound = 0
self.smileysLost = 0
self.url="https://sticker-service.appspot.com/v2/collection/smiley"
self.session = requests.Session()
self.data = []
self.grabSmiley()
self.writeSmileyData()
def grabSmiley(self):
r = self.session.get(self.url)
j = r.json()
print ("got json")
print (str(len(j)))
for element in j:
for meta in element["smileys"]:
print ("---------------")
print (str(meta))
sID = meta["id"]
sType = meta["type"]
sName = meta["name"]
self.addSmileyData(sID, sType, sName)
print ("Smiley Found:" + "({0})".format(sName))
self.smileysFound += 1
print ("found " + str(self.smileysFound))
else:
print("Could not grab smiley")
self.smileysLost += 1
def addSmileyData(self, sID, sType, sName):
self.data.append({"SMILEY_ID":sID, "SMILEY_TYPE":sType, "SMILEY_NAME":sName})
def writeSmileyData(self):
dataFile = open("smileys.json", "a+")
dataFile.write(json.dumps(self.data)+"\n")
dataFile.close()
if __name__ == "__main__":
SmileyGrabber()

Categories