In Python 3.6.4, what is a good approach for starting a new separate process or thread on every click of a button? I have written this code but it's not working the way I want it to.
from multiprocessing import process
import requests
import threading
from tkinter import *
def download():
name=entry2.get()
url=entry1.get()
r = requests.head(url)
if name:
file_name = name
else:
file_name = url.split('/')[-1]
try:
file_size = int(r.headers['content-length'])
part=file_size/4
start=0
end=part
except:
print ("Invalid URL")
return
print ('%s downloaded' % file_name)
def thread(url):
file_name=entry2.get()
r=requests.get(url)
data=r.content
with open('file_name','rb+')as fp:
data1=fp.read()
with open('file_name',"wb+") as fp:
data1=fp.write(data)
print("its working3")
if __name__=='__main__':
p=process(target=download,args=())
p.start()
p.join()
root=Tk()
frame=Frame(root,width=500,height=450,bg="lightpink")
url1=Label(frame,text="enter url here")
name=Label(frame,text="enter the name of the file")
url1.grid(row=0,sticky=E)
name.grid(row=1,sticky=E)
entry1=Entry(frame)
entry2=Entry(frame)
entry1.grid(row=0,column=1)
entry2.grid(row=1,column=1)
button1=Button(frame,text="download" ,command=download)
button1.grid(row=2,column=0)
button3=Button(frame,text="quit",command=frame.quit)
button3.grid(row=2,column=1)
frame.grid()
print("its working4")
root.mainloop()
Does this do the job? It uses the threading module rather than multiprocessing:
#from multiprocessing import process
from threading import Thread as process
import requests
import threading
from tkinter import *
def download():
name=entry2.get()
url=entry1.get()
r = requests.head(url)
if name:
file_name = name
else:
file_name = url.split('/')[-1]
try:
file_size = int(r.headers['content-length'])
part=file_size/4
start=0
end=part
except:
print ("Invalid URL")
return
print ('%s downloaded' % file_name)
def thread(url):
file_name=entry2.get()
r=requests.get(url)
data=r.content
with open('file_name','rb+')as fp:
data1=fp.read()
with open('file_name',"wb+") as fp:
data1=fp.write(data)
print("its working3")
root=Tk()
frame=Frame(root,width=500,height=450,bg="lightpink")
url1=Label(frame,text="enter url here")
name=Label(frame,text="enter the name of the file")
url1.grid(row=0,sticky=E)
name.grid(row=1,sticky=E)
entry1=Entry(frame)
entry2=Entry(frame)
entry1.grid(row=0,column=1)
entry2.grid(row=1,column=1)
button1=Button(frame,text="download" ,command=lambda: process (target = download).start ())
button1.grid(row=2,column=0)
button3=Button(frame,text="quit",command=root.destroy)
button3.grid(row=2,column=1)
frame.grid()
print("its working4")
root.mainloop()
Related
I using python 3 and mcstatus
My source code is:
from mcstatus import JavaServer
import concurrent.futures
import json
imput_file_name = "dockerformcstatus\dockerMyScript\pyscript\input.txt"
def scaner(data):
print("Processing data: ", data)
global serverJsonData
global timeout
server = JavaServer.lookup(data,timeout)
print(server)
try:
serverJson = server.status().raw
print(serverJson)
serverJsonData.append(serverJson)
except:
print("can't connect")
if __name__ == "__main__":
serverJsonData = []
timeout = 5
data = None
with open(imput_file_name,"r") as f:
data = f.readlines()
#print(data)
with concurrent.futures.ProcessPoolExecutor() as executor:
executor.map(scaner,data)
#scaner("168.119.4.46:25619")
with open("serverdata.json", "w") as f:
f.write(json.dumps(serverJsonData))
The problem is theat If I use the with concurrent.futures.ProcessPoolExecutor() as executor executor.map(scaner,data) then it not wants to write into the list "serverJsonData", but if I run it as scaner("168.119.4.46:25619") then it works.
I Tryed debug it but not get the reason for this "bug"
Below is my try to create a username availability checker with proxies, so far it works as intended
the only thing is that its slow, i tried to implement threads but no different as im not sure if im doing it right or not.
used concurrent.futures and threading libraries.
Is there a better way to code this kind of programs or are there any other suggestions?
Thanks in advance
import requests
import json
import ctypes
import colorama
from colorama import Fore
from datetime import datetime
import os
os.system("cls")
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
colorama.init()
url = "https://link"
def grab_proxies():
proxylist = []
prx = open('proxy.txt','r')
prx = prx.readlines()
for proxy in prx:
proxy = proxy.rstrip("\n")
proxylist.append(proxy)
return proxylist
prlist = grab_proxies()
def grab_usernames():
userlist = []
users = open('userlist.txt','r')
users = users.readlines()
for user in users:
user = user.rstrip("\n")
userlist.append(user)
return userlist
ulist = grab_usernames()
found = 0
pc = 0
uc = 0
for i in range(0,len(prlist)):
ctypes.windll.kernel32.SetConsoleTitleW(f"[# Checker] | Counter: %s - Found: %s - Current Proxy: %s - Started at: %s" % (i, found, prlist[pc], current_time))
try:
req = requests.post(url,headers=headers, data = {"requested_username": ulist[uc], "xsrf_token": "F0kpyvjJgeBtsOk5Gl6Jvg"},proxies={'http' : prlist[pc],'https': prlist[pc]}, timeout=2)
response = req.json()
#print(response,req.status_code)
#print(response)
#print(type(response))
if(response['reference']['status_code'] == 'TAKEN'):
#rd = response['errors']['username'][0]['code']
print(f'{Fore.LIGHTBLACK_EX}[{Fore.LIGHTRED_EX}Taken{Fore.LIGHTBLACK_EX}]{Fore.LIGHTCYAN_EX} {ulist[uc]}')
#print(ulist[uc]+" Taken")
uc+=1
elif(response['reference']['status_code'] == 'OK'):
print(f'{Fore.LIGHTBLACK_EX}[{Fore.LIGHTGREEN_EX}Available{Fore.LIGHTBLACK_EX}]{Fore.LIGHTCYAN_EX} {ulist[uc]}')
#print(ulist[uc]+" Available")
f = open("found.txt","a")
f.write(ulist[uc]+"\n")
f.close()
found+=1
uc+=1
elif(response['reference']['status_code'] == 'INVALID_BEGIN'):
print(f'{Fore.LIGHTBLACK_EX}[{Fore.LIGHTRED_EX}Invalid Username{Fore.LIGHTBLACK_EX}]{Fore.LIGHTCYAN_EX} {ulist[uc]}')
uc+=1
elif(response['reference']['status_code'] == 'DELETED'):
print(f'{Fore.LIGHTBLACK_EX}[{Fore.LIGHTRED_EX}Deleted{Fore.LIGHTBLACK_EX}]{Fore.LIGHTCYAN_EX} {ulist[uc]}')
uc+=1
else:
print(response)
except:
#print(prlist[pc]+ " Going to next proxy")
pc+=1
pass
#break
x = input("Finished!.. press enter to exit")
You could use https://github.com/encode/requests-async to do your requests in an async way
I am able to use haveibeenpwned to search for 1 account compromise. However, I could not find an option to use the API key to search for compromise of all the email accounts on a domain. (For example. if the domain is xyz.com, I want to search for the compromise of abc#xyz.com, peter.charlie#xyz.com and so on). I am aware of the notification email that I can sign up for. But, that is a lengthy process and I prefer using the API.
So, I wrote a script to search against haveibeenpwned for all the email address of my domain, but it takes very long. I searched through a couple of Github projects, but I did not find any such implementation. Has anyone tried this before?
I have added the code below. I am using Multi threading approach, but still it takes very long, is there any other Optimization strategy I can use? Please help. Thank you.
import requests, json
import threading
from time import sleep
import datetime
import splunklib.client as client
import splunklib.results as results
date = datetime.datetime.now()
from itertools import islice
import linecache
import sys
def PrintException():
exc_type, exc_obj, tb = sys.exc_info()
f = tb.tb_frame
lineno = tb.tb_lineno
filename = f.f_code.co_filename
linecache.checkcache(filename)
line = linecache.getline(filename, lineno, f.f_globals)
print 'EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)
class myThread (threading.Thread):
def __init__(self, threadID, name, list_emails):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.list_emails = list_emails
def run(self):
i=0
print "Starting " + self.name
for email in self.list_emails:
print i
i=i+1
result = check_pasteaccount(email)
print email
print result
print result
print "Exiting " + self.name
def check_pasteaccount(account):
account = str(account)
result = ""
URL = "https://haveibeenpwned.com/api/v3/pasteaccount/%s?truncateResponse=false" % (account)
# print(URL)
headers= {'hibp-api-key':api_key}
result = ""
try:
r = requests.get(url=URL,headers=headers)
# sleep(2)
status_code = r.status_code
if status_code == 200:
data = r.text
result = []
for entry in json.loads(data.decode('utf8')):
if int((date - datetime.datetime.strptime(entry['Date'], '%Y-%m-%dT%H:%M:%SZ')).days) > 120:
pass
else:
result.append(['Title: {0}'.format(entry['Title']), \
'Source: {0}'.format(['Source']), \
'Paste ID: {0}'.format(entry['Id'])])
if len(result) == 0:
result = "No paste reported for given account and time frame."
else:
paste_result = ""
for entry in result:
for item in entry:
paste_result += str(item) + "\r\n"
paste_result += "\r\n"
result = paste_result
elif status_code == 404:
result = "No paste for the account"
else:
if status_code == 429:
sleep(5)
# print "Limit exceeded, sleeping"
result = check_pasteaccount(account)
else:
result = "Exception"
print status_code
except Exception as e:
result = "Exception"
PrintException()
pass
return result
def split_every(n, iterable):
iterable = iter(iterable)
for chunk in iter(lambda: list(islice(iterable, n)), []):
yield chunk
def main():
print datetime.datetime.now()
# Fetching the list of email addresses from Splunk
list_emails = connect_splunk()
print datetime.datetime.now()
i=0
list_split = split_every(1000,list_emails)
threads=[]
for list in list_split:
i=i+1
thread_name = "Thread" + str(i)
thread = myThread(1, thread_name, list)
thread.start()
threads.append(thread)
# Wait for all the threads to complete
for t in threads:
t.join()
print "Completed Search"
Here's a shorter and maybe more efficient version of your script using the standard multiprocessing library instead of a hand-rolled thread system.
You'll need Python 3.6+ since we're using f-strings.
You'll need to install the tqdm module for fancy progress bars.
You can adjust the number of concurrent requests with the pool size parameter.
Output is written in machine-readable JSON Lines format into a timestamped file.
A single requests session is shared (per-worker), which means less time spent connecting to HIBP.
import datetime
import json
import multiprocessing
import random
import time
import requests
import tqdm
HIBP_PARAMS = {
"truncateResponse": "false",
}
HIBP_HEADERS = {
"hibp-api-key": "xxx",
}
sess = requests.Session()
def check_pasteaccount(account):
while True:
resp = sess.get(
url=f"https://haveibeenpwned.com/api/v3/pasteaccount/{account}",
params=HIBP_PARAMS,
headers=HIBP_HEADERS,
)
if resp.status_code == 429:
print("Quota exceeded, waiting for a while")
time.sleep(random.uniform(3, 7))
continue
if resp.status_code >= 400:
return {
"account": account,
"status": resp.status_code,
"result": resp.text,
}
return {
"account": account,
"status": resp.status_code,
"result": resp.json(),
}
def connect_splunk():
# TODO: return emails
return []
def main():
list_emails = [str(account) for account in connect_splunk()]
datestamp = datetime.datetime.now().isoformat().replace(":", "-")
output_filename = f"accounts-log-{datestamp}.jsonl"
print(f"Accounts to look up: {len(list_emails)}")
print(f"Output filename: {output_filename}")
with multiprocessing.Pool(processes=16) as p:
with open(output_filename, "a") as f:
results_iterable = p.imap_unordered(
check_pasteaccount, list_emails, chunksize=20
)
for result in tqdm.tqdm(
results_iterable,
total=len(list_emails),
unit="acc",
unit_scale=True,
):
print(json.dumps(result, sort_keys=True), file=f)
if __name__ == "__main__":
main()
error: (3, 'Illegal characters found in URL')
My url has got special characters like [AVC_(1)_(P1)_0]
i cant get this to work, i tried encoding but that would give me "Could not resolve host: https%3A"
Please advice
import sys
import Queue
import threading
import pycurl
import os
import urllib
from StringIO import StringIO
num_conn = 1
# Make a queue with (url, filename) tuples
queue = Queue.Queue()
with open('list.txt') as f:
for line in f:
print line
queue.put((line, 'test.mp4'))
if 'str' in line:
break
# Check args
assert queue.queue, "no URLs given"
num_urls = len(queue.queue)
num_conn = min(num_conn, num_urls)
assert 1 <= num_conn <= 10000, "invalid number of concurrent connections"
print "PycURL %s (compiled against 0x%x)" % (pycurl.version, pycurl.COMPILE_LIBCURL_VERSION_NUM)
print "----- Getting", num_urls, "URLs using", num_conn, "connections -----"
class WorkerThread(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while 1:
try:
url, filename = self.queue.get_nowait()
except Queue.Empty:
raise SystemExit
#dirname = os.path.dirname(filename)
#fp = open(dirname, "wb")\
#url = urllib.quote(url.encode('utf-8'))
fp = open(os.getcwd()+'/'+filename, "wb")
curl = pycurl.Curl()
curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.FOLLOWLOCATION, 1)
curl.setopt(pycurl.MAXREDIRS, 5)
curl.setopt(pycurl.CONNECTTIMEOUT, 30)
curl.setopt(pycurl.TIMEOUT, 300)
curl.setopt(pycurl.NOSIGNAL, 1)
curl.setopt(pycurl.WRITEDATA, fp)
try:
curl.perform()
except:
import traceback
traceback.print_exc(file=sys.stderr)
sys.stderr.flush()
curl.close()
fp.close()
sys.stdout.write(".")
sys.stdout.flush()
# Start a bunch of threads
threads = []
for dummy in range(num_conn):
t = WorkerThread(queue)
t.start()
threads.append(t)
# Wait for all threads to finish
for thread in threads:
thread.join()
Why not use requests in lieu of pycurl, which would make your run method:
def run(self):
while True:
try:
url, filename = self.queue.get_nowait()
except Queue.Empty:
raise SystemExit
with open(os.getcwd()+'/'+filename, "wb") as fp:
#fp.write(requests.get(url).content)
fp.write(requests.get(url, headers={'user-agent': 'CodeGuru'}).content
I made a few other, stylistic changes.
I am trying to implement a multihtreaded crawler that takes an initial url and searches for links within that link and displays each links and at the same time look for links within each link
This is my code
import urllib.request, re, threading, csv
from queue import Queue
from bs4 import BeautifulSoup
from sys import exit
class a3_6:
__url_q = Queue(100)
__html_q = Queue()
__data_q = Queue()
__visited_urls = []
def __init__(self, start_url, max_threads):
self.__url_q.put(start_url)
self.max_threads = max_threads
def gethtml(self,url):
try:
req=urllib.request.Request(url)
html=urllib.request.urlopen(req).read()
self.__html_q.put(html)
except urllib.error.URLError as e:
print(e.reason)
except:
print("invalid: " + url)
self.__visited_urls.append(url)
def mine_thread(self):
while True:
if not self.__html_q.empty():
soup = BeautifulSoup(self.__html_q.get(),"html.parser")
for a in soup.find_all('a', href=True):
if a not in self.__visited_urls:
link='https://en.wikipedia.org'+a.get('href')
self.__url_q.put(link)
self.__data_q.put(link)
else:
break
def store(self):
while True:
if not self.__data_q.empty():
print (self.__data_q.get())
def download_thread(self):
while True:
if not self.__url_q.empty():
self.gethtml(self.__url_q.get())
else:
break
def run(self):
self.download_thread()
self.mine_thread()
self.store()
def op(self):
for x in range(self.max_threads):
t = threading.Thread(target=self.run)
t.daemon = True
t.start()
self.store()
if __name__ == '__main__':
a=a3_6('https://en.wikipedia.org/wiki/Main_Page', 5)
a.op()
EDIT: I edited the code and now I am getting proper results but again not ending.
I arrived at the solution. I took James Harrison's help. i don't know why he deleted his original solution but here it is
import urllib.request, threading
from queue import Queue
from bs4 import BeautifulSoup
from sys import exit
from a3_3 import store_to_db
class a3_5:
__url_q = Queue(100)
__html_q = Queue()
__data_q = Queue()
__visited_urls=[]
def gethtml(self,url):
try:
req=urllib.request.Request(url)
html=urllib.request.urlopen(req).read()
self.__html_q.put(html)
pars=urlparse(url)
except urllib.error.URLError as e:
print(e.reason+':'+url)
except:
print("invalid: " + url)
def mine_thread(self):
while True:
if not self.__html_q.empty():
soup = BeautifulSoup(self.__html_q.get(),"html.parser")
for a in soup.find_all('a', href=True):
link=a.get('href')
"""if not link.startswith('www'):
link=self.__prfx+link"""
if link not in self.__visited_urls:
self.__url_q.put(link)
self.__data_q.put(link)
else:
break
def store(self):
while True:
if not self.__data_q.empty():
cont=self.__data_q.get()
print (cont)
else:
break
def download_thread(self):
while True:
if not self.__url_q.empty():
self.gethtml(self.__url_q.get())
self.__url_q.task_done()
def op(self,*urls):
for x in range(25):
d = threading.Thread(target=self.download_thread)
d.setDaemon(True)
d.start()
for url in urls:
self.__url_q.put(url)
self.__url_q.join()
self.mine_thread()
self.store()
if __name__ == '__main__':
urls=['https://en.wikipedia.org/wiki/Bajirao']#,'https://en.wikipedia.org/wiki/Malharrao_Holkar','https://en.wikipedia.org/wiki/Ranoji_Scindia']
a=a3_5()
a.op(*urls)
Essentially I had to arrange another queue where I had to set the workers to activate the threads. Also, the mine_thread and store methods needed to start after the completion of download_thread method, because the values wouldn't get stored.