python multithreading in loops - python

I think I have a very basic understanding of how threading works, but since I don't know that much I can't figure this out. I want to have a pool limit of about 10 threads, but the tricky part is I don't know how to make it read line by line.
proxies = {
'http': 'http://123.10.210.213:9999',
'https': 'http://123.10.210.213:9999'
}
def create_proxy_lst(txt):
print("""
########################################
# WORKING | NOT WORKING #
########################################
""")
proxy_list = []
with open(txt) as f:
for line in f:
proxy_list.append(line.strip('\n'))
return proxy_list
def check_proxy(website="https://google.com/"):
working = 0
not_working = 0
total = 0
lst = create_proxy_lst("uncheckedproxys.txt")
for proxy in lst:
try:
proxies["https"] = "http://" + proxy
proxies["http"] = "http://" + proxy
r = requests.get(website, timeout=1, proxies=proxies)
if r.status_code == 200:
print("%s" % proxy)
working += 1
total += 1
os.system("title Working: " + str(working) + "\t Not working " + str(not_working) + " ✔" + " Total: " + str(total) + "/" + str(len(lst)))
except Exception:
print("\t\t %s" % proxy)
not_working += 1
total += 1
os.system("title Working: " + str(working) + "\t Not working " + str(not_working) + " ✖" + " Total: " + str(total) + "/" + str(len(lst)))

Put your proxies into a Queue.Queue(), and then start 10 threads to read proxy from the queue.
In your case:
from Queue import Queue
from threading import Thread
def worker(proxy_queue):
while not proxy_queue.empty():
proxy = proxy_queue.get()
working = 0
not_working = 0
total = 0
try:
proxies["https"] = "http://" + proxy
proxies["http"] = "http://" + proxy
r = requests.get(website, timeout=1, proxies=proxies)
if r.status_code == 200:
print("%s" % proxy)
working += 1
total += 1
os.system("title Working: " + str(working) + "\t Not working " + str(not_working) + " ✔" + " Total: " + str(total) + "/" + str(len(lst)))
except Exception:
print("\t\t %s" % proxy)
not_working += 1
total += 1
os.system("title Working: " + str(working) + "\t Not working " + str(not_working) + " ✖" + " Total: " + str(total) + "/" + str(len(lst)))
if __name__ == '__main__':
# Build a queue
proxy_queue = Queue()
# Put these proxies into the queue
with open("uncheckedproxys.txt") as f:
for line in f:
proxy_queue.put(line.strip())
# Create thread pool
thread_pool = [Thread(target=worker, args=proxy_queue) for i in range(10)]
# Start threads
for thread in thread_pool:
thread.start()

Related

Stock flow log connections in Lambda

I'm actually doing a python script in AWS Lambda to transform statless logs from Flow Logs to statfull logs.
But I've some difficulties to save connections inside Lambda.
Variable "stock" is my list which will save connections
def init():
global stock
stock = []
def lambda_handler(event, context):
# retrieve bucket name and file_key from the S3 event
bucket_name = event['Records'][0]['s3']['bucket']['name']
file_key = event['Records'][0]['s3']['object']['key']
logger.info('Reading {} from {}'.format(file_key, bucket_name))
# get the object
obj = s3.get_object(Bucket=bucket_name, Key=file_key)
if obj['ContentType'] =='application/x-gzip' or key.endswith('.gz'):
lines_read = gzip.decompress(obj['Body'].read())
else:
lines_read = obj['Body'].read()
lines_read = lines_read[3:-3]
lines = lines_read.decode().split("\n")
for line in lines:
lines_split = line.split(" ")
if lines_split[17] == "3" or lines_split[17] == "19":
ts = str(time.time())
line_whrite = ts + " " + "Short Connection " + lines_split[5] + " " + lines_split[6] + " " + lines_split[7] + " " + lines_split[8] + " " + lines_split[16]
elif lines_split[17] == "1":
i = 0
ts = str(time.time())
json2 = {"time": lines_split[15], "srcaddr": lines_split[5], "dstaddr": lines_split[6], "srcport": lines_split[7], "dstport": lines_split[8]}
for connection in stock:
if json2["srcaddr"] == connection["srcaddr"] or connection["dstaddr"] and json2["dstaddr"] == connection["srcaddr"] or connection["dstaddr"] and json2["srcport"] == connection["srcport"] or connection["dstport"] and json2["dstport"] == connection["srcport"] or connection["dstport"]:
tpsstart = int(json2["time"])
tpsend = int(connection["time"])
converted_tpsstart = datetime.datetime.fromtimestamp(round(tpsstart / 1000))
converted_tpsend = datetime.datetime.fromtimestamp(round(tpsend / 1000))
tpsdiff = converted_tpsend - converted_tpsstart
line_whrite = "Connection Start :" + str(converted_tpsstart) + " Connection End:" + str(converted_tpsend) + " Connection Duration:" + str(tpsdiff) + " " + json2["srcaddr"] + " " + json2["dstaddr"] + " " + json2["srcport"] + " " + json2["dstport"]
del stock[i]
print(line_whrite)
i = i + 1
The problem is (I think) that my variable "stock" is always reset so empty...
Do you have some idea ?
Thanks :)

using a variable from a json.load in another function

I am using an API for receiving TTN data from a device. i have created a function "def on_message(mqttc, obj, msg):", using a json.loads(msg.payload.decode('utf-8')) to receive mqtt data.
I want to pick the variable "node_data" and use in def devdata(). but it seems that i do not get anything but None.
import paho.mqtt.client as mqtt
import json
import pybase64
import binascii
APPEUI = "0018B24441524632"
APPID = "adeunis_fieldtester"
PSW = "ttn-account-v2.vuQczD1bmPoghhaKjlIHR-iHovHIbYMpfWSKosPAGaU"
# Call back functions
# gives connection message
def on_connect(mqttc, mosq, obj, rc):
print("Connected with result code:" + str(rc))
# subscribe for all devices of user
mqttc.subscribe('+/devices/+/up')
# gives message from device
def on_message(mqttc, obj, msg):
try:
x = json.loads(msg.payload.decode('utf-8'))
# metadata
app = x["app_id"]
device = x["dev_id"]
deveui = x["hardware_serial"]
port = x["port"]
confirmed = x["confirmed"]
counter = x["counter"]
payload_fields = x["payload_raw"]
datetime = x["metadata"]["time"]
gateways = x["metadata"]["gateways"]
frequency = x["metadata"]["frequency"]
modulation = x["metadata"]["modulation"]
data_rate = x["metadata"]["data_rate"]
air_time = x["metadata"]["airtime"]
coding_rate = x["metadata"]["coding_rate"]
for gw in gateways:
gateway_id = gw["gtw_id"]
timestamp = gw["timestamp"]
time = gw["time"]
channel = gw["channel"]
gwrssi = gw["rssi"]
gwsnr = gw["snr"]
# decoding the payload_field
payload_r = (pybase64.b64decode(payload_fields + "="))
# decoding the Payload_r to Byte-Ascii string
payload_h = binascii.hexlify(payload_r)
# Byte to tekst
node_data = (payload_h.decode("utf-8)")) #this is the variable i would like to use in devdata()
# Printing data, + str(payload_fields) + ", "
print(str(app) + ", " + str(device) + ", " + str(deveui) + ", " + str(port) + ", " + str(counter) + ", "
+ str(node_data) + ", " + str(modulation) + ", " + str(datetime) + ", " + str(frequency) + ", "
+ str(confirmed) + ", " + str(data_rate) + ", " + str(air_time) + ", " + str(coding_rate) + ", "
+ str(gateway_id) + ", " + str(timestamp) + ", " + str(time) + "," + str(channel) + ", "
+ str(gwrssi) + ", " + str(gwsnr))
return node_data #return data for use in devdata()
except Exception as e:
print(e)
pass
def devdata():
node_data = on_message() # trying to use this as the Variable
temperatur = int(node_data[2:4], 16)
print("temperatur =", temperatur)
`

Starting threads in a for cycle produces multiple results

-I found the problem!- In function SendMessage I was using UserID (with capital letters) instead of userid (which was the actual parameter passed to each thread). So Python printed the UserID of the for cycle instead of the "individual" userid passed to the different functions. It was only a logging problem, the program sent messages correctly.
I have a for that loops through the elements of a user's list. Each iteration, I would like to start a separate background thread to send a message to that user. By saying "send a message" I mean a simple POST request made using the requests Python lib. At the end of the thread, an output on the console is written. Every 24 requests (so every 24 threads) the app needs to stop for about a second.
Success = 0
Bounces = 0
def SendMessage(botid, token, userid, messageid, tag):
global Success
global Bounces
try:
payload = {...}
r = requests.post("...", params=payload, headers=head, timeout=2)
#problem with request?
pjson = json.loads(r.text)
if r.status_code != 200:
log(str(r.status_code) + " " + pjson["result"] + " UserID: " + UserID + "; URL: " + "..." + BotID + "/users/" + UserID + "/send; Params: " + str(payload))
Bounces += 1
return
Success += 1
return
except requests.exceptions.Timeout:
#wait for connection to be available again!
while not conn_available():
print("... Waiting for a new connection...")
time.sleep(10)
log("Request timed out. UserID: " + UserID + "; URL: " + "..." + BotID + "/users/" + UserID + "/send; Params: " + str(payload))
Bounces += 1
except requests.exceptions.ConnectionError:
log("Unable to connect. UserID: " + UserID + "; URL: " + "..." + BotID + "/users/" + UserID + "/send; Params: " + str(payload))
Bounces += 1
except requests.exceptions.HTTPError:
log("Invalid request. UserID: " + UserID + "; URL: " + "..." + BotID + "/users/" + UserID + "/send; Params: " + str(payload))
Bounces += 1
except requests.exceptions.RequestException:
log("Invalid request. UserID: " + UserID + "; URL: " + "..." + BotID + "/users/" + UserID + "/send; Params: " + str(payload))
Bounces += 1
while True:
newMsgsReq = ""
try:
#Check for new messages
newMsgsReq = requests.get("...", timeout=2)
if newMsgsReq.text == "false":
#exit sub
time.sleep(2)
continue
except requests.exceptions.HTTPError as errh:
log("Request has failed: There was an error in the request: [" + str(errh) + "]")
time.sleep(2)
continue
except requests.exceptions.ConnectionError as errc:
log("Request has failed: check internet connection & retry: [" + str(errc) + "]")
time.sleep(2)
continue
except requests.exceptions.Timeout as errt:
log("Request has failed: check internet connection & retry: [" + str(errt) + "]")
time.sleep(2)
continue
except requests.exceptions.RequestException as err:
log("Request has failed: There was an error in the request: [" + str(err) + "]")
time.sleep(2)
continue
#we have a message!!!
#Extract BotID, Token, MessageID
msgInf = newMsgsReq.text.split("|")
MessageID = msgInf[0]
BotID = msgInf[1]
Token = msgInf[2]
Tag = msgInf[3]
del msgInf[0:4]
suc("New message found: " + str(MessageID))
suc("Total recipients: " + str(len(msgInf)))
#Begin send!
Cycles = 0
TotCycles = 0
#Loop through msgInf
for UserID in msgInf:
#Create the thread.
process = threading.Thread(target=SendMessage, args=[BotID, Token, UserID, MessageID, Tag])
process.start()
TotCycles += 1
pb.print_progress_bar(TotCycles)
Cycles += 1
if Cycles == 24:
time.sleep(1)
Cycles = 0
suc("Message " + str(MessageID) + " sent successfully (" + str(Success) + " success, " + str(Bounces) + " bounces")
Success = 0
Bounces = 0
time.sleep(3)
Let's say my user list is:
{1, 2, 3, 4, ..., 24, 25, ...}. I expect my application to output:
1. Message 1 sent successfully...
2. Message 2 sent successfully...
...
24. Message 24 sent successfully.
Instead, I am getting this output:
1. Message 1 sent successfully.
2. Message 1 sent successfully.
...
24. Message 1 sent successfully.
So all the 24 outputs are related to the first of the 24 ids. It seems like the for loop does not proceed...
This prints the incremented counter without any trouble so I think you may need to provide all of the code and some sample input.
import threading
import time
def SendMessage(userid):
print(userid)
while True:
cycles = 1
for user_id in [1, 2, 3]:
process = threading.Thread(target=SendMessage, args=[user_id])
process.start()
cycles += 1
if cycles == 24:
time.sleep(1)
cycles = 0
time.sleep(3)
Run it on repl.it

why there is a "SyntaxError: invalid syntax"

Error message:
runfile('//rschfs1x/userrs/xd73_RS/Documents/Python Scripts/test_515.py', wdir='//rschfs1x/userrs/xd73_RS/Documents/Python Scripts')
File "//rschfs1x/userrs/xd73_RS/Documents/Python Scripts/test_515.py", line 120
if __name__ == "__main__":
^
SyntaxError: invalid syntax
Really not sure why thre is this problem
def sendemail(alertmessage):
msg = MIMEText(alertmessage)
msg['Subject'] = 'Patent Data Error Message'
msg['From'] = "patentcornell#gmail.com"
msg['To'] = "patentcornell#gmail.com"
# Credentials (if needed)
username = 'patentcornell#gmail.com'
password = ''
# The actual mail send
server = smtplib.SMTP('smtp.gmail.com:587')
server.starttls()
server.login(username,password)
server.sendmail("hello", "xd73#cornell.edu", alertmessage)
server.sendmail("hello", "jq58#cornell.edu", alertmessage)
#server.sendmail("hello", "sl2448#cornell.edu", alertmessage)
server.sendmail("hello", "patentcornell#gmail.com", alertmessage)
'''
Shanjun Li <sl2448#cornell.edu>,
Panle Barwick <pjb298#cornell.edu>,
Jing Qian <jq58#cornell.edu>
'''
server.quit()
def main(year = 1985, starting_page = 2, filepath = ""):
time1 = time.time()
print "start timing: " + str(time1)
driver = webdriver.Firefox()
driver.get("http://epub.sipo.gov.cn/")
elem = driver.find_element_by_id("soso_text")
f = open( filepath + "year_test_" + str(year), "wb")
flog = open(filepath + "log_" + str(year), "wb")
driver.get("http://epub.sipo.gov.cn/")
elem = driver.find_element_by_id("soso_text")
elem.send_keys(str(year))
elem.send_keys(Keys.RETURN)
content = driver.page_source.encode('utf-8')
for uchar in content:
f.write(bytearray([ord(uchar)]))
flog.write(str(year) + " " + str(1) + "\n")
case = 1
nextpage = driver.find_element_by_xpath("/html/body/div[3]/div[2]/div[4]/a[7]")
turnto = driver.find_element_by_xpath("/html/body/div[3]/div[2]/div[4]/span")
print "hello 0"
print nextpage.get_attribute("innerHTML")
totalnum = int(nextpage.get_attribute("innerHTML"))
print "totalnum: " + str(totalnum)
#try:
# from which page we start downloading, the starting page
for i in range(starting_page, totalnum + 1):
timeinterval_1 = time.time()
print str(year) + " page: " + str(i)
#turnto = driver.find_element_by_xpath("/html/body/div[3]/div[2]/div[4]/span")
turnto = driver.find_element_by_id('pn')
turnto.send_keys(str(i))
turnto.send_keys(Keys.ENTER)
#turnto.submit()
content = driver.page_source.encode('utf-8')
# test file writing speed
time_file_start = time.time()
for uchar in content:
f.write(bytearray([ord(uchar)]))
f.write("\n")
#robust Check
print "interval: " + str(timeinterval_2 - timeinterval_1)
if timeinterval_2 - timeinterval_1 > 60:
flog.write("lost: " + str(year) + " page: " + str(i) + "\n")
print "too long to load " + str(year) + " " + str(i)
continue
else:
flog.write(str(year) + " " + str(i) + "\n")
continue
#except ValueError as err:
print(err.args)
sendmail("xd73_RS: " + err.args + " " + str(time.time())
if __name__ == "__main__":
filepath = "U:/Documents/Python Scripts/test_data_515/"
#sendemail("test email function!")
main(2010, 2, filepath)
This line:
sendmail("xd73_RS: " + err.args + " " + str(time.time())
Has unmatched parentheses. Generally if the syntax error occurs on line n, its a good idea to check the syntax on line n-1.

Type error: 'module' object has no attribute '_getitem_'

I am trying to fetch emails and then we need to write the mails in to different files according to the subject.
import email
import imaplib
from threading import Thread
from time import sleep
import time
def myThreadFun():
M = imaplib.IMAP4_SSL('imap.gmail.com')
M.login("noticeboard16#gmail.com", "embeddedSystems")
while (1):
M.select()
rv, data1 = M.search(None, 'UNSEEN')
for i in data1[0].split():
resp, data = M.FETCH(i, '(RFC822)')
mail = email.message_from_string(data[0][1])
for part in mail.walk():
# multipart are just containers, so we skip them
if part.get_content_maintype() == 'multipart':
continue
# we are interested only in the simple text messages
if part.get_content_subtype() != 'plain':
continue
payload = part.get_payload()
print '\n'
print '[%s]' % (mail['Subject'])
print 'From: %s' % (mail['From'])
print 'Date:', mail['Date']
print '================================='
print payload
#time.sleep(10)
#save_string = str("/home/buddhi-xubuntu/Python/email_" + ".text")
#myfile = open(save_string, 'a')
#myfile.write(mail['Subject']+ "\nFrom: " + mail['From'] + "\nDate: " + mail['Date'] + "=============\n" + payload)
#myfile.close()
#time.sleep(10)
#with file('email_.txt', 'r') as original: data = original.read()
#with file('email_2.txt', 'w') as modified: modified.write(mail['Subject']+ "\nFrom: " + mail['From'] + "\nDate: " + mail['Date'] + "\n=============\n" + payload + "\n" + data)
#orig_string = str("/home/e11452/Downloads/email_" + ".text")
#f = open(orig_string,'r')
#temp = f.read()
#f.close()
if mail['Subject']=="E/11":
new_string = str("/home/e11452/Downloads/email_11" + ".text")
f = open(new_string, 'w')
f.write(mail['Subject']+ "\nFrom: " + mail['From'] + "\nDate: " + mail['Date'] + "\n=============\n" + payload + "\n")
elif mail['Subject']=="E/10":
new_string = str("/home/e11452/Downloads/email_12" + ".text")
-f = open(new_string, 'w')
f.write(mail['Subject']+ "\nFrom: " + mail['From'] + "\nDate: " + mail['Date'] + "\n=============\n" + payload + "\n")
f.write(temp)
f.close()
time.sleep(10)
M.LOGOUT()
thread = Thread(target = myThreadFun)
thread.start()
above is the code i tried and i get an error saying
Traceback (most recent call last): File "email14.py", line 58, in if email['Subject'] == 'E/11': TypeError: 'module' object has no attribute 'getitem'
Seems that you misspell mail as email. email is a module you imported. Then you got the error.

Categories