Multi threading not executing Tasks simultaneously - python

I wrote a python code for my project that stores file name of files on the SFTP server in a list and does the same for local directory and compares the 2 lists and then download the uncommon files. code takes its server credentials from a separate text file of format
CONTENTS OF TEXT FILE (config9.txt)
127.0.0.1,username,password,log22.txt,/new45,C:\Users\udit\Desktop\ftp2\bob16
my program executed one by one for different IP's. I wanted it to run it parallely for all IP's using multithreading. I modified my code but it still executes it one by one. Whats the error in it?
import os
import pysftp
import csv
import socket
from stat import S_IMODE, S_ISDIR, S_ISREG
import time
import threading
from threading import Thread
from time import sleep
import os.path
import shutil
import fileinput
import lock
cnopts = pysftp.CnOpts()
cnopts.hostkeys = None
def mainprocess():
with open("config9.txt", "r") as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
for row in csv_reader:
print(row)
IP=row[0]
myUsername=row[1]
myPassword=row[2]
txtfile=row[3]
remotepath=row[4]
localpath=row[5]
with pysftp.Connection(host=IP, username=myUsername, password=myPassword, cnopts=cnopts) as sftp:
r = str(socket.gethostbyaddr(IP))
print("connection successful with " + r)
def get_file2(sftp, remotedir):
result = []
for entry in sftp.listdir_attr(remotedir):
remotepath = remotedir + "/" + entry.filename
mode = entry.st_mode
if S_ISDIR(mode):
result += get_file2(sftp, remotepath)
elif S_ISREG(mode):
if (time.time() - entry.st_mtime) // (24 * 3600) > 0:
result.append(entry.filename)
return result
remote_path = remotepath
d = get_file2(sftp, remote_path)
def process():
myName = []
with open(filename, 'r+') as f:
for name in f.readlines():
while '\n' in name:
name = name.replace('\n', '')
myName.append(name)
print(myName)
print(len(myName))
import os
filtered_list = [string for string in d if string not in myName]
print("filtered list:", filtered_list)
print(len(filtered_list))
local_path =localpath
def compare_files(sftp, remotedir, remotefile, localdir, preserve_mtime=True):
remotepath = remotedir + "/" + remotefile
localpath = os.path.join(localdir, remotefile)
mode = sftp.stat(remotepath).st_mode
if S_ISDIR(mode):
try:
os.mkdir(localpath, mode=777)
except OSError:
pass
compare_files(sftp, remotepath, localpath, preserve_mtime)
elif S_ISREG(mode):
sftp.get(remotepath, localpath, preserve_mtime=True)
for file in filtered_list:
compare_files(sftp, remote_path, file, local_path, preserve_mtime=False)
with open(filename, 'a') as f:
for item in filtered_list:
f.write("%s\n" % item)
filename=txtfile
try:
file = open(filename, 'r')
process()
except IOError:
file = open(filename, 'w')
process()
t=time.time()
t1=threading.Thread(target=mainprocess(),args=())
t1.start()
t1.join()

Related

Get python code to persist after IndexError

I am querying an API from a website. The API will be down for maintenance from time to time and also, there may not be data available for querying at times. I have written the code to keep forcing the program to query the API even after an error, however it doesn't seem to be working.
The following is the code:
import threading
import json
import urllib
from urllib.parse import urlparse
import httplib2 as http #External library
import datetime
import pyodbc as db
import os
import gzip
import csv
import shutil
def task():
#Authentication parameters
headers = { 'AccountKey' : 'secret',
'accept' : 'application/json'} #this is by default
#API parameters
uri = 'http://somewebsite.com/' #Resource URL
path = '/something/TrafficIncidents?'
#Build query string & specify type of API call
target = urlparse(uri + path)
print(target.geturl())
method = 'GET'
body = ''
#Get handle to http
h = http.Http()
#Obtain results
response, content = h.request(target.geturl(), method, body, headers)
api_call_time = datetime.datetime.now()
filename = "traffic_incidents_" + str(datetime.datetime.today().strftime('%Y-%m-%d'))
createHeader = 1
if os.path.exists(filename + '.csv'):
csvFile = open(filename + '.csv', 'a')
createHeader = 0
else:
#compress previous day's file
prev_filename = "traffic_incidents_" + (datetime.datetime.today()-datetime.timedelta(days=1)).strftime('%Y-%m-%d')
if os.path.exists(prev_filename + '.csv'):
with open(prev_filename + '.csv' , 'rb') as f_in, gzip.open(prev_filename + '.csv.gz', 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
os.remove(prev_filename + '.csv')
#create new csv file for writing
csvFile = open(filename + '.csv', 'w')
#Parse JSON to print
jsonObj = json.loads(content)
print (json.dumps(jsonObj, sort_keys=True, indent=4))
with open("traffic_incidents.json","w") as outfile:
#Saving jsonObj["d"]
json.dump(jsonObj, outfile, sort_keys=True, indent=4,ensure_ascii=False)
for i in range(len(jsonObj["value"])):
jsonObj["value"][i]["IncidentTime"] = jsonObj["value"][i]["Message"].split(' ',1)[0]
jsonObj["value"][i]["Message"] = jsonObj["value"][i]["Message"].split(' ',1)[1]
jsonObj["value"][i]["ApiCallTime"] = api_call_time
#Save to csv file
header = jsonObj["value"][0].keys()
csvwriter = csv.writer(csvFile,lineterminator='\n')
if createHeader == 1:
csvwriter.writerow(header)
for i in range(len(jsonObj["value"])):
csvwriter.writerow(jsonObj["value"][i].values())
csvFile.close()
t = threading.Timer(120,task)
t.start()
while True:
try:
task()
except IndexError:
pass
else:
break
I get the following error and the program stops:
"header = jsonObj["value"][0].keys()
IndexError: list index out of range"
I would like the program to keep running even after the IndexError has occured.
How can I edit the code to achieve that?

How can I execute 1TB log file python script in command line

I have a log file which consists the capacity of 1TB. I am uncertain that how to run this python script in the command line. I use the sys library but still my csv data is not added.
Below is my python code.
import re
import sys
from csv import writer
import datetime
log_file = '/Users/kiya/Desktop/mysql/ipscan/ip.txt'
output_file = '/Users/kiya/Desktop/mysql/ipscan/output.csv'
try:
ip_file =sys.argv[1]
except Exception:
print("usage: pythone3 {} [ip file]".format(sys.argv[0]))
sys.exit()
name_to_check = 'MBX_AUTHENTICATION_FAILED'
with open(log_file,encoding="utf-8") as infile:
for line in infile:
if name_to_check in line:
username = re.search(r'(?<=userName=)(.*)(?=,)', line)
username = username.group()
ip = re.search(r'(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])',line)
ip = ip.group()
with open(output_file, 'a') as outfile:
outfile.write('{username},{ip}\n'.format(username=username, ip=ip))
try this it work well and if the problem persist check your search regex :
from sys import argv
log_file = ""
if len(argv) > 0 :
log_file = argv[1]
else :
quit("No log_file specified, exiting script.")
with open(log_file, encoding="utf-8") as infile:
for line in infile:
if name_to_check in line:
username = re.search(r'(?<=userName=)(.*)(?=,)', line)
username = username.group()
date = re.search(r'(?P<date>\d{8})\s+(?P<time>\d{9})\+(?P<zone>\d{4})', line)
date = datetime.datetime.strptime(date.group('date'), "%Y%m%d").strftime("%Y-%m-%d")
print(date)
time = re.search(r'(?P<date>\d{8})\s+(?P<time>\d{9})\+(?P<zone>\d{4})', line)
time = datetime.datetime.strptime(time.group('time'), "%H%M%S%f").strftime("%H:%M:%S")
print(time)
ip = re.search(r'(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])',line)
with open(output_file, "ab", buffering=0) as outfile:
outfile.write( ("{},{},{},{}\n".format(username, date, time, ip)).encode() )

Show progressbar for ftp upload in python

So I'm writing a script that will search a folder recursively for a .mkv file and upload it to my NAS. I have the script working but I can't see the progress. I imported this progressbar I found on github and was able to use the demo to see it work. It is what I want however the FTP example they included is to retrieve a file from the server. I need to upload.
How do I get the uploaded amount on an interval so I can run an update to the progress bar?
Bellow is the code I have that works for the upload
import os
import ftplib
import ntpath
ntpath.basename("a/b/c")
def path_leaf(path):
head, tail = ntpath.split(path)
return tail or ntpath.basename(head)
from glob import glob
FileTransferList = [y for x in os.walk('/tmp/rippedMovies') for y in glob(os.path.join(x[0], '*.mkv'))]
global ftp
def FTP_GLOB_transfer(URL, UserName, Password):
ftp = ftplib.FTP(URL, UserName, Password) # connect to host, default port
print URL, UserName, Password
for file in FileTransferList:
FileName = path_leaf(file)
print file
TheFile = open(file, 'r')
ftp.storbinary('STOR ' + FileName, TheFile, 1024)
TheFile.close()
ftp.quit()
ftp = None
FTP_GLOB_transfer('<IP>', '<USER>', '<PASSWORD>')
I figured it out. I decided to use TQDM as I found some easier to read documentation for it. I was assuming that storbinary() had to have a return or something to tell it's progress, just didn't know I was looking for a callback.
Anyways I added a new import from tqdm import tqdm
I added this filesize = os.path.getsize(file) to get the file size of the file
Then I replaced ftp.storbinary('STOR ' + FileName, TheFile, 1024) with this code
with tqdm(unit = 'blocks', unit_scale = True, leave = False, miniters = 1, desc = 'Uploading......', total = filesize) as tqdm_instance:
ftp.storbinary('STOR ' + FileName, TheFile, 2048, callback = lambda sent: tqdm_instance.update(len(sent)))
And overall the new working code looks like
import os
import ftplib
import ntpath
from tqdm import tqdm
ntpath.basename("a/b/c")
def path_leaf(path):
head, tail = ntpath.split(path)
return tail or ntpath.basename(head)
from glob import glob
FileTransferList = [y for x in os.walk('/tmp/rippedMovies') for y in glob(os.path.join(x[0], '*.mkv'))]
global ftp
def FTP_GLOB_transfer(URL, UserName, Password):
ftp = ftplib.FTP(URL, UserName, Password) # connect to host, default port
print URL, UserName, Password
for file in FileTransferList:
FileName = path_leaf(file)
filesize = os.path.getsize(file)
print file
TheFile = open(file, 'r')
with tqdm(unit = 'blocks', unit_scale = True, leave = False, miniters = 1, desc = 'Uploading......', total = filesize) as tqdm_instance:
ftp.storbinary('STOR ' + FileName, TheFile, 2048, callback = lambda sent: tqdm_instance.update(len(sent)))
TheFile.close()
ftp.quit()
ftp = None
It now outputs as
/tmp/rippedMovies/TestMovie.mkv
Uploading......: 51%|████████████████████▉ | 547M/1.07G
[00:05<00:14, 36.8Mblocks/s]

Create byte array for zip file

I'm trying to import a zip file in to Confluence with the RPC importSpace object, but it keeps giving errors.. Atlassian has the following documentation that you can use for this:
public boolean importSpace(String token, byte[] importData)
I have created a small Pyhton script that loops through a file where the zip filenames are saved:
#!/usr/bin/python
import xmlrpclib
import time
import urllib
confluence2site = "https://confluence"
server = xmlrpclib.ServerProxy(confluence2site + '/rpc/xmlrpc')
username = ''
password = ''
token = server.confluence2.login(username, password)
loginString = "?os_username=" + username + "&os_password=" + password
filelist = ""
start = True
with open('exportedspaces.txt') as f:
for file in f:
try:
print file
f = open(os.curdir+ "\\zips\\" + file, 'rb')
fileHandle = f.read()
f.close()
server.confluence2.importSpace(token, xmlrpclib.Binary(fileHandle))
except:
print file + " failed to restore"
failureList.append(file)
Where does it goes wrong?

Output to one of two files after if statement evaluation

I am using the python nmap module to do certificate discovery and monitoring.
import nmap
import time
import datetime
from contextlib import redirect_stdout
from datetime import date
import itertools
This is the function that manages the nmap scan.
SSLmonitor = nmap.PortScanner()
def SSLmon(IPaddress):
now = datetime.datetime.now()
filename = now.strftime("/results/%Y-%m-%dSSLmonWeekly.txt", "r")
filename2 = now.strftime("/results/%Y-%m-%dSSLmonWeeklyExpiring.txt", "r")
results = SSLmonitor.scan(hosts=IPaddress, arguments='--script=ssl-cert -p 443')
# If statement checks to see if last scanned address has ['scan'].
#print(results.keys())
if 'scan' in results:
hosts = results['scan']
#print(hosts)
# If host had cert on 443 try, else do pass.
try:
for host in hosts: # Chunk through the tuple.
try:
# Get the information for each host in the tuple
cert = hosts[host]['tcp'][443]['script']['ssl-cert']
try:
for host in hosts: # Chunk through the dictionary to get the key value pairs we want.
try:
# Get the information for each host in the hecka-dictionary.
cert = hosts[host]['tcp'][443]['script']['ssl-cert']
cert2 = cert.replace('Not valid after: ', '~')
indexed = cert2.index('~')
if datetime.date(int(cert2[indexed+1:indexed+5]), int(cert2[indexed+6:indexed+8]), int(cert2[indexed+9:indexed+11])) - datetime.date.today()
with open(filename, 'a') as f:
with redirect_stdout(f):
print("\n", IPaddress, cert.replace(':', '=').replace('commonName=', '\ncommonName=').replace('/', '\n'))
else:
with open(filename2, 'a') as e:
with redirect_stdout(e):
print("\n", IPaddress, cert.replace(':', '=').replace('commonName=', '\ncommonName=').replace('/', '\n'))
except Exception:
pass
except Exception:
pass
I looping through a list of IP addresses I know have certs
on port 443, and running them through the scanner.
#--------------------------------------------------------------
# Iterate through list of hosts with discovered certs
#--------------------------------------------------------------
with open("/weeklyscanlist/DiscoveredCertsByIP.txt", "r") as text_file:
for line in itertools.islice(text_file, 1, 4250):
SSLmon(str(line))
When I was handling output like this
if datetime.date(int(expDate[0]), int(expDate[1]), int(expDate[2])) - datetime.date.today() < datetime.timedelta(days = 30):
print("\n", IPaddress, cert.replace(':', '=').replace('commonName=', '\ncommonName=').replace('/', '\n'), "this cert is expiring soon)
else:
print("\n", IPaddress, cert.replace(':', '=').replace('commonName=', '\ncommonName=').replace('/', '\n'), "this cert is good for a while)
And it was working fine, so I know it the way I am handling writing the output to a file, but I can't find a way to handle this.
I've also tried
if datetime.date(int(expDate[0]), int(expDate[1]), int(expDate[2])) - datetime.date.today() < datetime.timedelta(days = 30):
fn = open(filename2, 'a')
fn.write("\n", IPaddress, cert.replace(':', '=').replace('commonName=', '\ncommonName=').replace('/', '\n'))
fn.close()
else:
f = open(filename, 'a')
f.write("\n", IPaddress, cert.replace(':', '=').replace('commonName=', '\ncommonName=').replace('/', '\n'))
f.close()
without success.
Here is what you should do, following up on deweyredman’s sage advice to use string formatting:
Use string formatting to generate the output line of the file
Use an if-then to pick the filename
Only use one file write block to stay DRY
dt_diff = datetime.date(int(expDate[0]), int(expDate[1]), int(expDate[2])) - datetime.date.today()
which_filename = filename if dt_diff < datetime.timedelta(days = 30) else filename2
with open(which_filename, 'a') as f:
line = '\n%s %s' % (
IPaddress,
cert.replace(':', '='
).replace('commonName=', '\ncommonName='
).replace('/', '\n'),)
f.write(line)
It seems your args to f.write are incorrect...try:
if datetime.date(int(expDate[0]), int(expDate[1]), int(expDate[2])) - datetime.date.today() < datetime.timedelta(days = 30):
fn = open(filename2, 'a')
fn.write("{} {} {}".format("\n", IPaddress, cert.replace(':', '=').replace('commonName=', '\ncommonName=').replace('/', '\n')))
fn.close()
else:
f = open(filename, 'a')
f.write("{} {} {}".format("\n", IPaddress, cert.replace(':', '=').replace('commonName=', '\ncommonName=').replace('/', '\n')))
f.close()
f.write takes one argument...you're passing it three.

Categories