How can I execute 1TB log file python script in command line

How can I execute 1TB log file python script in command line - python

I have a log file which consists the capacity of 1TB. I am uncertain that how to run this python script in the command line. I use the sys library but still my csv data is not added.
Below is my python code.
import re
import sys
from csv import writer
import datetime
log_file = '/Users/kiya/Desktop/mysql/ipscan/ip.txt'
output_file = '/Users/kiya/Desktop/mysql/ipscan/output.csv'
try:
ip_file =sys.argv[1]
except Exception:
print("usage: pythone3 {} [ip file]".format(sys.argv[0]))
sys.exit()
name_to_check = 'MBX_AUTHENTICATION_FAILED'
with open(log_file,encoding="utf-8") as infile:
for line in infile:
if name_to_check in line:
username = re.search(r'(?<=userName=)(.*)(?=,)', line)
username = username.group()
ip = re.search(r'(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])',line)
ip = ip.group()
with open(output_file, 'a') as outfile:
outfile.write('{username},{ip}\n'.format(username=username, ip=ip))

try this it work well and if the problem persist check your search regex :
from sys import argv
log_file = ""
if len(argv) > 0 :
log_file = argv[1]
else :
quit("No log_file specified, exiting script.")
with open(log_file, encoding="utf-8") as infile:
for line in infile:
if name_to_check in line:
username = re.search(r'(?<=userName=)(.*)(?=,)', line)
username = username.group()
date = re.search(r'(?P<date>\d{8})\s+(?P<time>\d{9})\+(?P<zone>\d{4})', line)
date = datetime.datetime.strptime(date.group('date'), "%Y%m%d").strftime("%Y-%m-%d")
print(date)
time = re.search(r'(?P<date>\d{8})\s+(?P<time>\d{9})\+(?P<zone>\d{4})', line)
time = datetime.datetime.strptime(time.group('time'), "%H%M%S%f").strftime("%H:%M:%S")
print(time)
ip = re.search(r'(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])',line)
with open(output_file, "ab", buffering=0) as outfile:
outfile.write( ("{},{},{},{}\n".format(username, date, time, ip)).encode() )

Related

skip code if the file already exists in output directory (outputfile)

check output directory(outputfile) before running the code to check the file already in there!
import os
texttofind ='abcd'
texttoreplace ='wxyz'
sourcepath = os.listdir('inputfiles/')
for file in sourcepath:
inputfile = 'inputfiles/'+ file
print('conversion is ongoing for:' +inputfile)
with open(inputfile,'r') as inputfile:
filedata = inputfile.read()
freq = 0
freq = filedata.count(texttofind)
destinationpath = 'outputfile/' + file
filedata = filedata.replace(texttofind,texttoreplace)
with open(destinationpath,'w') as file:
file.write(filedata)
print ('total %d Record replaced %freq')

Something like this?
import os
texttofind ='abcd'
texttoreplace ='wxyz'
sourcepath = os.listdir('inputfiles/')
for file in sourcepath:
destinationpath = 'outputfile/' + file
if not os.path.isfile(destinationpath):
inputfile = 'inputfiles/'+ file
print('conversion is ongoing for:' +inputfile)
with open(inputfile,'r') as inputfile:
filedata = inputfile.read()
freq = 0
freq = filedata.count(texttofind)
filedata = filedata.replace(texttofind,texttoreplace)
with open(destinationpath,'w') as file:
file.write(filedata)
print ('total %d Record replaced %freq')

Multi threading not executing Tasks simultaneously

I wrote a python code for my project that stores file name of files on the SFTP server in a list and does the same for local directory and compares the 2 lists and then download the uncommon files. code takes its server credentials from a separate text file of format
CONTENTS OF TEXT FILE (config9.txt)
127.0.0.1,username,password,log22.txt,/new45,C:\Users\udit\Desktop\ftp2\bob16
my program executed one by one for different IP's. I wanted it to run it parallely for all IP's using multithreading. I modified my code but it still executes it one by one. Whats the error in it?
import os
import pysftp
import csv
import socket
from stat import S_IMODE, S_ISDIR, S_ISREG
import time
import threading
from threading import Thread
from time import sleep
import os.path
import shutil
import fileinput
import lock
cnopts = pysftp.CnOpts()
cnopts.hostkeys = None
def mainprocess():
with open("config9.txt", "r") as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
for row in csv_reader:
print(row)
IP=row[0]
myUsername=row[1]
myPassword=row[2]
txtfile=row[3]
remotepath=row[4]
localpath=row[5]
with pysftp.Connection(host=IP, username=myUsername, password=myPassword, cnopts=cnopts) as sftp:
r = str(socket.gethostbyaddr(IP))
print("connection successful with " + r)
def get_file2(sftp, remotedir):
result = []
for entry in sftp.listdir_attr(remotedir):
remotepath = remotedir + "/" + entry.filename
mode = entry.st_mode
if S_ISDIR(mode):
result += get_file2(sftp, remotepath)
elif S_ISREG(mode):
if (time.time() - entry.st_mtime) // (24 * 3600) > 0:
result.append(entry.filename)
return result
remote_path = remotepath
d = get_file2(sftp, remote_path)
def process():
myName = []
with open(filename, 'r+') as f:
for name in f.readlines():
while '\n' in name:
name = name.replace('\n', '')
myName.append(name)
print(myName)
print(len(myName))
import os
filtered_list = [string for string in d if string not in myName]
print("filtered list:", filtered_list)
print(len(filtered_list))
local_path =localpath
def compare_files(sftp, remotedir, remotefile, localdir, preserve_mtime=True):
remotepath = remotedir + "/" + remotefile
localpath = os.path.join(localdir, remotefile)
mode = sftp.stat(remotepath).st_mode
if S_ISDIR(mode):
try:
os.mkdir(localpath, mode=777)
except OSError:
pass
compare_files(sftp, remotepath, localpath, preserve_mtime)
elif S_ISREG(mode):
sftp.get(remotepath, localpath, preserve_mtime=True)
for file in filtered_list:
compare_files(sftp, remote_path, file, local_path, preserve_mtime=False)
with open(filename, 'a') as f:
for item in filtered_list:
f.write("%s\n" % item)
filename=txtfile
try:
file = open(filename, 'r')
process()
except IOError:
file = open(filename, 'w')
process()
t=time.time()
t1=threading.Thread(target=mainprocess(),args=())
t1.start()
t1.join()

Search string and replace value

Here is my code,
import os, os.path
import collections
import sys
import re
DIR_DAT = "dat"
DIR_OUTPUT = "output"
filenames = []
data = []
#in case if output folder doesn't exist
if not os.path.exists(DIR_OUTPUT):
os.makedirs(DIR_OUTPUT)
input_file = 'axcfgpasww-from-server.dat'
element = sys.argv[1]
output_value = sys.argv[2]
with open(input_file) as infile, open('axcfgpasww-modified.dat', "w") as outfile:
if element in open(input_file).read():
regex = re.findall("\s*([\S\s]+)", element)
outfile.write(regex[0])
print(regex[0])
else:
print('No match found')
The input_file :
CMD_VERS=2
CMD_TRNS=O
CMD_REINIT=N
CMD_ORDER=MAJECR
CMD_COMM=2590552
NUM_COMM:nNN0.7=2590552
I execute my script this way : modify_file.py NUM_COMM:nNN0.7 Hello world !
So if NUM_COMM:nNN0.7 exists in the file, it writes "NUM_COMM:nNN0.7" in a new axcfgpasww-modified.dat file.
But what I want to do, is execute my command as written above. And the result is the input file, with only the new value.
So my output file would be :
CMD_VERS=2
CMD_TRNS=O
CMD_REINIT=N
CMD_ORDER=MAJECR
CMD_COMM=2590552
NUM_COMM:nNN0.7=Hello world !
Can anyone help me on this ?
Thanks !

I have made some refactoring to your original code, and made it produce the output you seek,
import os, os.path
import collections
import sys
import re
DIR_DAT = "dat"
DIR_OUTPUT = "output"
filenames = []
data = []
found = False
#in case if output folder doesn't exist
if not os.path.exists(DIR_OUTPUT):
os.makedirs(DIR_OUTPUT)
input_file = 'axcfgpasww-from-server.dat'
element = sys.argv[1]
output_value = sys.argv[2]
with open(input_file) as infile:
for line in infile.readlines():
if element in line:
old_value = line.split("=")[1]
data.append(line.replace(old_value, output_value))
found = True
else:
data.append(line)
if not found:
print('No match found')
with open(input_file, 'w') as outfile:
for line in data:
outfile.write(line)
output:
CMD_VERS=2
CMD_TRNS=O
CMD_REINIT=N
CMD_ORDER=MAJECR
CMD_COMM=2590552
NUM_COMM:nNN0.7=Hello World!
Hope this helps

stdin stdout python: how to reuse the same input file twice?

I am quite new to Python and even newer to stdin stdout method. Nevertheless I need to make my script usable for UNIX commands, in order to make it possible for example to process 2 input files at once with my script.
This script works perfectly well with command line arguments:
newlist = []
def f1()
....
def f2(input_file):
vol_id = sys.argv[3]
for line in input_file:
if ... :
line = line.replace('abc','def')
line = line.replace('id', 'id'+vol_id)
....
newlist.append(line)
return newlist
def main():
if len(sys.argv) < 4:
print 'usage: ./myscript.py [file_in... file_out... volume_id]'
sys.exit(1)
else:
filename = sys.argv[1]
filename_out = sys.argv[2]
tree = etree.parse(filename)
extract(tree)
input_file = open(filename, 'rU')
change_class(input_file)
file_new = open(filename_out, 'w')
for x in newlist:
if '\n' in x:
x = x.replace('\n', '')
print>>file_new, x
When I tried to add stdin stdout to it, I first had a problem with reading the same input file first, and for this reason made some chages so that it would be actually open only once. Here is my modified main():
filename = sys.argv[1]
filename_out = sys.argv[2]
if filename == '-':
filename = sys.stdin
else:
input_file = open(filename, 'rU')
if filename_out == '-':
filename_out = sys.stdout
file_new = filename_out
else:
file_new = open(filename_out, 'w')
input_file = open(filename, 'rU')
tree = etree.fromstring(input_file)
extract(tree)
change_class(input_file)
for x in newlist:
if '\n' in x:
x = x.replace('\n', '')
print>>file_new, x
Then I ran my script like this:
./myscript.py - - volumeid < inputfile > outputfile
And I got this error message:
Traceback (most recent call last):
File "./myscript.py", line 191, in <module>
main()
File "./myscript.py", line 175, in main
input_file = open(filename, 'rU')
TypeError: coercing to Unicode: need string or buffer, file found
What am I doing wrong?

You are trying to use an open file object as a filename:
filename = sys.stdin
# ...
input_file = open(filename, 'rU')
You cannot re-read from sys.stdin anyway; you need to read all of the file into memory, then process it twice:
if filename == '-':
input_file = sys.stdin
else:
input_file = open(filename, 'rU')
input_data = input_file.read()
tree = etree.fromstring(input_data)
extract(tree)
change_class(input_data)
mwhere you'll have to alter change_class to handle a string, not an open file object.

python parse file

I have a file with username and emails, in this format :
pete,pbellyer#gmail.com
I want to only keep the email, so i thought about using a regex like this :
import re,sys
Mailfile = sys.argv[1]
file = open(Mailfile, "r")
for MAIL in file.readlines():
tmp = re.split("\n+", MAIL)
m = re.match( ',(.+)', MAIL)
m.group(0)
But then I don't know how to store the result in a file.
I always get the last email address in the new file.
What would be the best way to store the results in a file ?
Thanks!

import sys
infile, outfile = sys.argv[1], sys.argv[2]
with open(infile) as inf, open(outfile,"w") as outf:
line_words = (line.split(',') for line in inf)
outf.writelines(words[1].strip() + '\n' for words in line_words if len(words)>1)

You could use the csv module (since your data looks comma-separated, at least in your example):
import sys
import csv
with open('mail_addresses.txt', 'w') as outfile:
for row in csv.reader(open(sys.argv[1], 'rb')):
outfile.write("%s\n" % row[1])

Try something like this:
import sys
Mailfile = sys.argv[1]
Outfile = sys.argv[2]
try:
in_file = open(Mailfile, 'r')
out_file = open(Outfile, 'a')
for mail in in_file.readlines():
address = mail.split(',')[1].strip()
out_file.write(address+',') #if you want to use commas to seperate the files, else use something like \n to write a new line.
finally:
in_file.close()
out_file.close()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How can I execute 1TB log file python script in command line - python

Related

skip code if the file already exists in output directory (outputfile)

Multi threading not executing Tasks simultaneously

Search string and replace value

stdin stdout python: how to reuse the same input file twice?

python parse file

Categories

Resources