separate line output by groups

separate line output by groups - python

My python script checks mysqldump and if any problems script prints :
Dump is old for db;
Dump is not complete for db;
Dump is empty for db;
MySQL dump does not exist for db;
Script logs these records to the file line by line.
My question is there are a way to format output in the file like:
Dump is old for db;
Dump is old for db;
Dump is old for db;
Dump is not complete for db;
Dump is not complete for db;
Dump is not complete for db;
Dump is empty for db;
Dump is empty for db;
Dump is empty for db;
Because now my file looks like:
Dump is old for db;
Dump is empty for db;
Dump is old for db;
MySQL dump does not exist for db;
...
etc
Here my small script :)
#!/bin/env python
import psycopg2
import sys,os
from subprocess import Popen, PIPE
from datetime import datetime
import smtplib
con = None
today = datetime.now().strftime("%Y-%m-%d")
log_dump_fail = '/tmp/mysqldump_FAIL'
log_fail = open(log_dump_fail,'w').close()
log_fail = open(log_dump_fail, 'a')
sender = 'PUT_SENDER_NAME_HERE'
receiver = ['receiver_name']
smtp_daemon_host = 'localhost'
def db_backup_file_does_not_exist(db_backup_file):
if not os.path.exists(db_backup_file): return True
else: return False
def dump_health(last_dump_row, file_name,db):
last_row = last_dump_row.rsplit(" ")
tms = ''.join(last_row[4:5])
status = last_row[1:3]
if (status) and (tms != today):
log_fail.write("\nDB is old for "+ str(db) + str(file_name) + ", \nDump finished at " + str(''.join(tms)))
log_fail.write("\n-------------------------------------------")
elif not (status) and (tms == None):
log_fail.write("\nDump is not complete for "+str(db) + str(file_name) + " , end of file is not correct")
log_fail.write("\n-------------------------------------------")
suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
def humansize(nbytes):
if nbytes == 0: return '0 B'
i = 0
while nbytes >= 1024 and i < len(suffixes)-1:
nbytes /= 1024.
i += 1
f = ('%.2f' % nbytes).rstrip('0').rstrip('.')
return '%s %s' % (f, suffixes[i])
def dump_size(dump_file, file_name,db):
size = os.path.getsize(dump_file)
if (size < 1024):
human_readable = humansize(size)
log_fail.write("\nDump is empty for " +str(db) + "\n" +"\t" + str (file_name)+", file size is " + str(human_readable))
log_fail.write("\n-------------------------------------------")
def report_to_noc(isubject,text):
TEXT = text
SUBJECT = subject
message = 'Subject: %s\n\n%s' % (SUBJECT, TEXT)
server = smtplib.SMTP(smtp_daemon_host)
server.sendmail(sender, receiver, message)
server.quit()
try:
con = psycopg2.connect(database='**', user='***', password='***', host='****')
cur = con.cursor()
cur.execute("""\
select ad.servicename, (select name from servers where id = ps.server_id) as servername
from packages as p, account_data as ad, package_servers as ps
where p.id=ad.package_id and
p.date_deleted IS NULL and
p.id=ps.package_id and
p.aktuel IS NULL and
p.pre_def_package_id = 4 and
p.mother_package_id !=0 and
ps.subservice_id=5 and
p.mother_package_id NOT IN (select id from packages where date_deleted IS NOT NULL)
ORDER BY servername;
""")
while (1):
row = cur.fetchone ()
if row == None:
break
db = row[0]
server_name = str(row[1])
if (''.join(server_name) == 'SKIP_THIS') or (''.join(server_name) == 'SKIP_THIS'):
continue
else:
db_backup_file = '/storage/backup/db/mysql/' + str(db) + '/current/' + str(db) + '.mysql.gz'
db_backup_file2 = '/storage/backup/' + str(''.join(server_name.split("DB"))) + '/mysql/' + str(db) + '/current/'+ str(db) + '.mysql.gz'
db_file_does_not_exist = False
db_file2_does_not_exist = False
if db_backup_file_does_not_exist(db_backup_file):
db_file_does_not_exist = True
if db_backup_file_does_not_exist(db_backup_file2):
db_file2_does_not_exist = True
if db_file_does_not_exist and db_file2_does_not_exist:
log_fail.write("\nMySQL dump does not exist for " + str(db) + "\n" + "\t" + str(db_backup_file2) + "\n" + "\t" + str(db_backup_file))
log_fail.write("\n-------------------------------------------")
continue
elif (db_file_does_not_exist) and not (db_file2_does_not_exist):
p_zcat = Popen(["zcat", db_backup_file2], stdout=PIPE)
p_tail = Popen(["tail", "-2"], stdin=p_zcat.stdout, stdout=PIPE)
dump_status = str(p_tail.communicate()[0])
dump_health(dump_status,db_backup_file2,db)
dump_size(db_backup_file2, db_backup_file2,db)
elif (db_file2_does_not_exist) and not (db_file_does_not_exist):
p_zcat = Popen(["zcat", db_backup_file], stdout=PIPE)
p_tail = Popen(["tail", "-2"], stdin=p_zcat.stdout, stdout=PIPE)
dump_status = str(p_tail.communicate()[0])
dump_health(dump_status,db_backup_file,db)
dump_size(db_backup_file,db_backup_file,db)
con.close()
except psycopg2.DatabaseError, e:
print 'Error %s' % e
sys.exit(1)
log_fail.close()
if os.path.getsize(log_dump_fail) > 0:
subject = "Not all MySQL dumps completed successfully. Log file backup:" + str(log_dump_fail)
fh = open(log_dump_fail, 'r')
text = fh.read()
fh.close()
report_to_noc(subject,text)
else:
subject = "MySQL dump completed successfullyi for all DBs, listed in PC"
text = "Hello! \nI am notifying you that I checked mysqldump files this morning.\nThere are nothing to worry about. :)"
report_to_noc(subject,text)

You can process your log file after it has been written.
One option is to read your file and sort the lines:
lines = open('log.txt').readlines()
lines.sort()
open('log_sorted.txt', 'w').write("\n".join(lines))
This won't emit an empty line between log types.
Another option is to use a Counter:
from collections import Counter
lines = open('log.txt').readlines()
counter = Counter()
for line in lines:
counter[line] += 1
out_file = open('log_sorted.txt', 'w')
for line, num in counter.iteritems():
out_file.write(line * num + "\n")

Looks like you want to group the output of the script, rather than log the info as it comes while searching.
Easiest would be to maintain 4 lists, on each for empty, not empty and so on. In the script add the db names to appropriate list instead of logging, and then dump the lists one by one into the file with appropriate prefixes("not empty for" + dbname).
For example, remove all the log_fail.write() from the functions and replace them with list.append() and write a separate function that writes to the log file as you like:
Add lists:
db_dump_is_old_list = []
db_dump_is_empty_list = []
db_dump_is_not_complete_list = []
db_dump_does_not_exist_list = []
Modify the Functions:
def dump_health(last_dump_row, file_name,db):
last_row = last_dump_row.rsplit(" ")
tms = ''.join(last_row[4:5])
status = last_row[1:3]
if (status) and (tms != today):
db_dump_is_old_list.append(str(db))
#log_fail.write("\nDB is old for "+ str(db) + str(file_name) + ", \nDump finished at " + str(''.join(tms)))
#log_fail.write("\n-------------------------------------------")
elif not (status) and (tms == None):
db_dump_is_not_complete_list.append(str(db)
#log_fail.write("\nDump is not complete for "+str(db) + str(file_name) + " , end of file is not correct")
#log_fail.write("\n-------------------------------------------")
def dump_size(dump_file, file_name,db):
size = os.path.getsize(dump_file)
if (size < 1024):
human_readable = humansize(size)
db_dump_is_empty_list.append(str(db))
#log_fail.write("\nDump is empty for " +str(db) + "\n" +"\t" + str (file_name)+", file size is " + str(human_readable))
#log_fail.write("\n-------------------------------------------")
if db_file_does_not_exist and db_file2_does_not_exist:
db_dump_does_not_exist_list.append(str(db))
#log_fail.write("\nMySQL dump does not exist for " + str(db) + "\n" + "\t" + str(db_backup_file2) + "\n" + "\t" + str(db_backup_file))
#log_fail.write("\n-------------------------------------------")
continue
And add a logger function:
def dump_info_to_log_file():
log_dump_fail = '/tmp/mysqldump_FAIL'
log_fail = open(log_dump_fail,'w').close()
log_fail = open(log_dump_fail, 'a')
for dbname in db_dump_is_old_list:
log_fail.write("Dump is Old for" + str(dbname))
log_fail.write("\n\n")
for dbname in db_dump_is_empty_list:
log_fail.write("Dump is Empty for" + str(dbname))
log_fail.write("\n\n")
for dbname in db_dump_is_not_complete_list:
log_fail.write("Dump is Not Complete for" + str(dbname))
log_fail.write("\n\n")
for dbname in db_dump_does_not_exist_list:
log_fail.write("Dump Does Not Exist for" + str(dbname))
log_fail.close()
Or you could simply log as you are doing, and then read in the file, sort and write back the file.

Thank you all for all interesting ideas.
I have really tried all options :)
To my mind:
With Counter object the pros is to few lines of code.
But cons are - many read\write operations. Log file is not big, however, I decided to decrease read(s) \ write(s)
With array the cons are to many lines of code :) but the pros is - write to the file only once.
So I implemented arrays.. :)
Thank you guys!!!

Related

How to run an .sql file full of commands as a command line argument to a python program?

I have a .sql file full of commands that I want to run through my python program. When I enter each line into the terminal individually, my program works as expected, as it has methods of parsing through the individual line entry to do what it needs to do. However, when I run the program (pa_2.py) by typing it into the terminal as such:
python3 pa_2.py < PA2_test.sql
the program doesn't read each line correctly and malfunctions, my guess being that it is unable to parse through the sql file correctly. The expected output is:
-- Database CS457_PA2 created.
-- Using database CS457_PA2.
-- Table Product created.
-- 1 new record inserted.
-- 1 new record inserted.
-- 1 new record inserted.
-- 1 new record inserted.
-- 1 new record inserted.
-- pid int|name varchar(20)|price float
-- 1|Gizmo|19.99
-- 2|PowerGizmo|29.99
-- 3|SingleTouch|149.99
-- 4|MultiTouch|199.99
-- 5|SuperGizmo|49.99
-- 1 record modified.
-- 2 records modified.
-- pid int|name varchar(20)|price float
-- 1|Gizmo|14.99
-- 2|PowerGizmo|29.99
-- 3|SingleTouch|149.99
-- 4|MultiTouch|199.99
-- 5|Gizmo|14.99
-- 2 records deleted.
-- 1 record deleted.
-- pid int|name varchar(20)|price float
-- 2|PowerGizmo|29.99
-- 3|SingleTouch|149.99
-- name varchar(20)|price float
-- SingleTouch|149.99
When I type each command as an individual line after typing in the terminal:
python3 pa_2.py
I get the expected output. However, when I run the script in the command line as:
python3 pa_2.py < PA2_test.sql
the output I get is:
created.CS457_PA2
because it does not exist.7_PA2
Created table Product.
1 new record inserted.
1 new record inserted.
1 new record inserted.
1 new record inserted.
1 new record inserted.
because it does not exist.uct
0 records modified.
0 records modified.
because it does not exist.uct
0 records modified.
Traceback (most recent call last):
File "/Users/isaac_reilly/Desktop/College/UNR/Classes/Year 3 2022-2023/Semester 1 2022/CS 457 Database Managemant Systems/Project 3/pa_2.py", line 79, in <module>
tablefunctions.deleteData(user_input, currentdb)
File "/Users/isaac_reilly/Desktop/College/UNR/Classes/Year 3 2022-2023/Semester 1 2022/CS 457 Database Managemant Systems/Project 3/tablefunctions.py", line 114, in deleteData
if float(splitter[4]) > float(searchText):
ValueError: could not convert string to float: '19.99)'
I want it to know that the end of each line is a semicolon ";". How would I use PA2_test.sql as a command line argument and run each line as expected? Below is the .sql file, as well as the rest of my program.
PA2_test.sql:
CREATE DATABASE CS457_PA2;
USE CS457_PA2;
CREATE TABLE Product (pid int, name varchar(20), price float);
INSERT INTO Product values(1, 'Gizmo', 19.99);
INSERT INTO Product values(2, 'PowerGizmo', 29.99);
INSERT INTO Product values(3, 'SingleTouch', 149.99);
INSERT INTO Product values(4, 'MultiTouch', 199.99);
INSERT INTO Product values(5, 'SuperGizmo', 49.99);
SELECT * FROM Product;
UPDATE Product set name = 'Gizmo' where name = 'SuperGizmo';
UPDATE Product set price = 14.99 where name = 'Gizmo';
SELECT * FROM Product;
DELETE FROM Product where name = 'Gizmo';
DELETE FROM Product where price > 150;
SELECT * FROM Product;
SELECT name, price FROM Product where pid != 2;
.EXIT
pa_2.py:
import fileinput
import sys
import dbfunctions
import tablefunctions
import selections
currentdb = None
user_input = None
TableList = [None]
#Loop continously prompts the terminal for an input from the user and then decides what to do based on input.
while (user_input != ".EXIT"):
user_input = input()
#print(user_input)
#States that all commands must end with a ';' if user types invalid command
if ";" not in user_input and user_input != ".EXIT":
print("Invalid command, all commands must end with ';'")
#Creates a database
if "CREATE DATABASE" in user_input:
dbName = dbfunctions.inputCleaner("CREATE DATABASE ", user_input)
dbfunctions.create_db(dbName)
#Deletes a database
if "DROP DATABASE" in user_input:
dbName = dbfunctions.inputCleaner("DROP DATABASE ", user_input)
dbfunctions.remove_db(dbName)
#Creates a table using attributes inputted by user
if "CREATE TABLE" in user_input:
tInput = dbfunctions.inputCleaner("CREATE TABLE ", user_input)
tableName = tInput.split()[0]
tablefunctions.createTable(tInput, tableName, currentdb)
#Deletes a table
if "DROP TABLE" in user_input:
tableName = dbfunctions.inputCleaner("DROP TABLE ", user_input)
tablefunctions.dropTable(tableName, currentdb)
#Modifies a table using attributes inputted by the user
if "ALTER TABLE" in user_input:
rawInput = dbfunctions.inputCleaner("ALTER TABLE ", user_input)
tablefunctions.alterTable(rawInput, currentdb)
#Sets current working database
if "USE" in user_input:
dbName = dbfunctions.inputCleaner("USE ", user_input)
#print(dbName)
currentdb = dbName
dbfunctions.finddb(currentdb)
#print("Using database " + currentdb)
#elif dbfunctions.finddb(currentdb) == 0:
#print("Unable to use database " + dbName + " because it does not exist.")
#Selects data from a user specified table and prints contents to terminal
if "SELECT" in user_input:
selections.selectSpecified(user_input, currentdb)
#Inserts given data into a specified table
if "INSERT INTO" in user_input:
dataInput = dbfunctions.inputCleaner("INSERT INTO ", user_input)
tableName = dataInput.split()[0]
tablefunctions.insertData(dataInput, tableName, currentdb)
#Changes data in table as specified
if "UPDATE" in user_input:
tablefunctions.updateData(user_input, currentdb)
#Deletes data from table as specified
if "DELETE FROM" in user_input:
tablefunctions.deleteData(user_input, currentdb)
dbfunctions.py:
import os
import subprocess
import shlex
import shutil
#Removes semicolon and given phrase from input
def inputCleaner(removePhrase, input):
cleaned = input.replace(";", "")
return cleaned.replace(removePhrase, "")
#Function used to create specified database (local directory)
def create_db(dbName):
try:
#Tries making directory
os.makedirs(dbName)
print("Database " + dbName + " created.")
except FileExistsError:
#Checks to see if directory already exists, throws exception if it does
print("!Failed to create database " + dbName + " because it already exists.")
#Function used to remove specified database (local directory)
def remove_db(dbName):
#Checks to see if specified directory exists and deletes if it does
if os.path.exists(dbName):
shutil.rmtree(dbName)
print("Database " + dbName + " deleted.")
#If selected directory does not exists, prints an error message to the screen
else:
print("!Failed to delete " + dbName + " because it does not exist.")
#Checks to make sure that specified database exists
def finddb(dbName):
if dbName in subprocess.run(['ls', '|', 'grep', dbName], capture_output = True, text = True).stdout:
print("Using database " + dbName)
else:
print("Unable to use database", dbName,"because it does not exist.")
def getOperand(op):
operand = None
if (op == '='):
operand = 0
elif (op == '!='):
operand = -3
elif (op == '<'):
operand = -1
elif (op == '>'):
operand = 1
return operand
tablefunctions.py:
import subprocess
import os
#Checks to make sure that specified table exists
def findtable(tableName, currentdb):
if tableName in subprocess.run(['ls', currentdb, '|', 'grep', tableName], capture_output = True, text = True).stdout:
return 1
else:
return 0
#Creates table with specified headers
def createTable(dataInput, tableName, currentdb):
unformattedAttributes = dataInput.replace(tableName, "")
tableAttributes1 = unformattedAttributes[2:]
tableAttributes2 = tableAttributes1[:-1]
formattedAttributes = tableAttributes2.split(",")
if (currentdb != None):
if findtable(tableName, currentdb) == 0:
os.system(f'touch {currentdb}/{tableName}.txt')
filename = currentdb + '/' + tableName + '.txt'
fedit = open(filename, 'w')
fedit.write(" |".join(formattedAttributes))
fedit.close()
print(f"Created table {tableName}.")
else:
print("!Failed to create table " + tableName + " because it already exists.")
else:
print("Please specify which database to use.")
#Deletes specified table
def dropTable(tableName, currentdb):
if (currentdb != None):
if findtable(tableName, currentdb) != 0:
os.system(f'rm {currentdb}/{tableName}.txt')
print("Table " + tableName + " deleted.")
else:
print("!Failed to delete " + tableName + " because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
#Inserts data into specified table
def insertData(dataInput, tableName, currentdb):
unformattedInput = dataInput.replace(tableName, "")
cleanedInput1 = unformattedInput.replace("'", "")
cleanedInput2 = cleanedInput1.replace(" ", "")
unformattedAttributes = cleanedInput2[7:-1]
formattedAttributes = unformattedAttributes.split(",")
if (currentdb != None):
if findtable(tableName, currentdb):
fedit = open(f'{currentdb}/{tableName}.txt', 'a')
fedit.write("\n" + " | ".join(formattedAttributes))
fedit.close()
print("1 new record inserted.")
else:
print("!Failed to insert data into " + tableName + " because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
#Modifies a table using attributes inputted by the user
def alterTable(rawInput, currentdb):
tableName = rawInput.split()[0]
alterCmd = rawInput.split()[1]
alterAttribute1 = rawInput.replace(tableName, "")
alterAttribute2 = alterAttribute1.replace(alterCmd, "")
newAttr = alterAttribute2[2:]
if (currentdb != None):
if findtable(tableName, currentdb):
fedit = open(f'{currentdb}/{tableName}.txt', 'a')
fedit.write(f" | {newAttr}")
fedit.close()
print("Table " + tableName + " modified.")
else:
print("!Failed to modify " + tableName + " because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
#Removes data from specified table
def deleteData(user_input, currentdb):
if (currentdb != None):
cleanedInput1 = user_input[12:-1]
cleanedInput2 = cleanedInput1.replace("'", "")#Cleans input
tableName = cleanedInput2.split()[0]
if findtable(tableName, currentdb) != 0:
replaceText = ""
searchText = cleanedInput2.split()[4]
searchCategory = cleanedInput2.split()[2]
with open(f'{currentdb}/{tableName}.txt', 'r') as file:
count = 0
replacement = ""
if cleanedInput2.split()[3] == "=":
#Loops line by line for keywords
for line in file:
line = line.strip()
splitter = line.split()#Puts line into list elements
if searchText == splitter[2]: #If elements matches search text
updatedLine = "" #delete it
count += 1 #Keeps track of number of edits
else:
updatedLine = line + "\n" #Reads line unchanged if specified data is not present
replacement = replacement + updatedLine
if cleanedInput2.split()[3] == ">":
lineCount = 0
for line in file:
line = line.strip()
splitter = line.split()
if lineCount == 0:
lineCount += 1
updatedLine = line + "\n"
else:
if float(splitter[4]) > float(searchText):
updatedLine = ""
count += 1
else:
updatedLine = line + "\n"
replacement = replacement + updatedLine
if cleanedInput2.split()[3] == "<":
lineCount = 0
for line in file:
line = line.strip()
splitter = line.split()
if lineCount == 0:
lineCount += 1
updatedLine = line + "\n"
else:
if float(splitter[4]) < float(searchText):
updatedLine = ""
count += 1
else:
updatedLine = line + "\n"
replacement = replacement + updatedLine
file.close()
with open(f'{currentdb}/{tableName}.txt', 'w') as file:
file.write(replacement)
file.close()
if count == 1:
print(str(count) + " record modified.")
else:
print(str(count) + " records modified.")
else:
print("!Failed to update " + tableName + " table because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
def updateData(user_input,currentdb):
if (currentdb != None):
cleanedInput1 = user_input[7:-1]
cleanedInput2 = cleanedInput1.replace("'", "")
tableName = cleanedInput2.split()[0]
if findtable(tableName, currentdb) != 0:
replaceText = cleanedInput2.split()[4]
searchText = cleanedInput2.split()[8]
replaceCategory = cleanedInput2.split()[2]
searchCategory = cleanedInput2.split()[6]
with open(f'{currentdb}/{tableName}.txt', 'r') as file:
count = 0
replacement = ""
if (replaceCategory == searchCategory):#if both columns being referenced are the same
for line in file:
line = line.strip()
if searchText in line:
updatedLine = line.replace(searchText, replaceText)
count += 1
else:
updatedLine = line
replacement = replacement + updatedLine + "\n"
else:
for line in file:
splitter = line.split()
splitter[4] = replaceText
line = line.strip()
if searchText == splitter[2]:
updatedLine = " ".join(splitter)
count += 1
else:
updatedLine = line
replacement = replacement + updatedLine + "\n"
file.close()
with open(f'{currentdb}/{tableName}.txt', 'w') as file:
file.write(replacement)
file.close()
if count == 1:
print(str(count) + " record modified.")
else:
print(str(count) + " records modified.")
else:
print("!Failed to update " + tableName + " table because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
selections.py:
import tablefunctions
import dbfunctions
def selectAll(tableName, currentdb):
if currentdb == None:
print("No specified database, enter 'USE <database_name>;'")
else:
if tablefunctions.findtable(tableName, currentdb):
fedit = open(f'{currentdb}/{tableName}.txt', 'r')
print(fedit.read())
fedit.close()
else:
print("!Failed to query table " + tableName + " because it does not exist.")
def selectSpecified(user_input, currentdb):
if "SELECT * FROM" in user_input:
tableName = dbfunctions.inputCleaner("SELECT * FROM ", user_input)
selectAll(tableName, currentdb)
else:
if "SELECT" in user_input:
selLower = user_input[7:-1]
selection = user_input[7:-1]
elif "select" in user_input:
selection = user_input[7:-1]
# Gathering list of variables
selectColumns = selection.replace(",", "").split()
selectColumns = selectColumns[:selectColumns.index("FROM")]
# Table name
tableName = selection.split()[len(selectColumns)+1]
# Gathering what to filter by
whereColumn = selection.split()[len(selectColumns)+3]
whereRecord = selection.split()[len(selectColumns)+5]
operand = dbfunctions.getOperand(selection.split()[len(selectColumns)+4])
if currentdb != None:
if tablefunctions.findtable(tableName, currentdb):
f = open(f'{currentdb}/{tableName}.txt', 'r')
file = f.readlines()
f.close()
selectColumnNums = []
columnNameString = ""
listToReturn = []
count = 0
for line in file:
if (count == 0): # Headers
# Finding the indexes of select and where columns
columnList = line.split()
columnListWithTypes = columnList.copy()
del columnListWithTypes[2::3]
del columnList[1::3]
columnCount = 0
# If variable is found in table, record its index
for word in columnList:
if word in selectColumns:
selectColumnNums.append(columnCount)
if (word == whereColumn):
whereColumnNum = columnCount
columnCount += 1
# Creating a custom table header for the selected columns
for index in selectColumnNums:
columnNameString += f"{columnListWithTypes[index]} {columnListWithTypes[index+1]} | "
queryHeader = columnNameString[:-3]
listToReturn.append(queryHeader)
if (count > 0): # Values
tupleDetails = line.split()
# Determines what to do with each row
def querySpecificHelper():
# Creates the row output
def queryStringMaker():
queryString = ""
for index in selectColumnNums:
queryString += f"{tupleDetails[index]} | "
queryResult = queryString[:-3]
listToReturn.append(queryResult)
if (operand == 0): # Equality
# The type checking here handles strings and numbers separately
# Ex. 150 or 150.00 would not find 150.00 or 150, respectively
if (type(tupleDetails[whereColumnNum]) is str):
if (tupleDetails[whereColumnNum] == whereRecord):
queryStringMaker()
elif (type(tupleDetails[whereColumnNum]) is not str):
if (float(tupleDetails[whereColumnNum]) == float(whereRecord)):
queryStringMaker()
elif (operand == 1): # Greater than
if (float(tupleDetails[whereColumnNum]) > float(whereRecord)):
queryStringMaker()
elif (operand == -1): # Less than
if (float(tupleDetails[whereColumnNum]) < float(whereRecord)):
queryStringMaker()
elif (operand == -3): # Inequality
if (type(tupleDetails[whereColumnNum]) is str):
if (tupleDetails[whereColumnNum] != whereRecord):
queryStringMaker()
elif (type(tupleDetails[whereColumnNum]) is not str):
if (float(tupleDetails[whereColumnNum]) != float(whereRecord)):
queryStringMaker()
querySpecificHelper()
count += 1
for line in listToReturn: # Prints table
print(line)
else:
print(f"Could not query table {tableName} because it does not exist.")
else:
print("Please specify which database to use.")

How to read .evtx file using python?

Guys do anyone know how to read event log file in C:\Windows\System32\winevt\Logs with .evtx extension?
I have already tried to open it using notepad and read using python but notepad says access is denied...
Do anyone know how to do it? Thanks in advance..

This is how you would read the file "Forwarded Events" from the event viewer. You need admin access so I would run it as admin but I it will prompt you for a password if you don't.
import win32evtlog
import xml.etree.ElementTree as ET
import ctypes
import sys
def is_admin():
try:
return ctypes.windll.shell32.IsUserAnAdmin()
except:
return False
if is_admin():
# open event file
query_handle = win32evtlog.EvtQuery(
'C:\Windows\System32\winevt\Logs\ForwardedEvents.evtx',
win32evtlog.EvtQueryFilePath)
read_count = 0
a = 1
while a == 1:
a += 1
# read 1 record(s)
events = win32evtlog.EvtNext(query_handle, 1)
read_count += len(events)
# if there is no record break the loop
if len(events) == 0:
break
for event in events:
xml_content = win32evtlog.EvtRender(event, win32evtlog.EvtRenderEventXml)
# parse xml content
xml = ET.fromstring(xml_content)
# xml namespace, root element has a xmlns definition, so we have to use the namespace
ns = '{http://schemas.microsoft.com/win/2004/08/events/event}'
substatus = xml[1][9].text
event_id = xml.find(f'.//{ns}EventID').text
computer = xml.find(f'.//{ns}Computer').text
channel = xml.find(f'.//{ns}Channel').text
execution = xml.find(f'.//{ns}Execution')
process_id = execution.get('ProcessID')
thread_id = execution.get('ThreadID')
time_created = xml.find(f'.//{ns}TimeCreated').get('SystemTime')
#data_name = xml.findall('.//EventData')
#substatus = data_name.get('Data')
#print(substatus)
event_data = f'Time: {time_created}, Computer: {computer}, Substatus: {substatus}, Event Id: {event_id}, Channel: {channel}, Process Id: {process_id}, Thread Id: {thread_id}'
print(event_data)
user_data = xml.find(f'.//{ns}UserData')
# user_data has possible any data
else:
ctypes.windll.shell32.ShellExecuteW(None, "runas", sys.executable, " ".join(sys.argv), None, 1)
input()

.evtx is the extension for Windows Eventlog files. It contains data in a special binary format designed by Microsoft so you cannot simply open it in a text editor.
The are open source tools to read .evtx and the NXLog EE can also read .evtx files. (Disclaimer: I'm affiliated with the latter).

I modified the accepted answer a bit as following, so it becomes reusable:
import xml.etree.ElementTree as Et
import win32evtlog
from collections import namedtuple
class EventLogParser:
def __init__(self, exported_log_file):
self.exported_log_file = exported_log_file
def get_all_events(self):
windows_events = []
query_handle = win32evtlog.EvtQuery(str(self.exported_log_file),
win32evtlog.EvtQueryFilePath | win32evtlog.EvtQueryReverseDirection)
while True:
raw_event_collection = win32evtlog.EvtNext(query_handle, 1)
if len(raw_event_collection) == 0:
break
for raw_event in raw_event_collection:
windows_events.append(self.parse_raw_event(raw_event))
return windows_events
def parse_raw_event(self, raw_event):
xml_content = win32evtlog.EvtRender(raw_event, win32evtlog.EvtRenderEventXml)
root = Et.fromstring(xml_content)
ns = "{" + root.tag.split('}')[0].strip('{') + "}"
system = root.find(f'{ns}System')
event_id = system.find(f'{ns}EventID').text
level = system.find(f'{ns}Level').text
time_created = system.find(f'{ns}TimeCreated').get('SystemTime')
computer = system.find(f'{ns}Computer').text
WindowsEvent = namedtuple('WindowsEvent',
'event_id, level, time_created, computer')
return WindowsEvent(event_id, level, time_created, computer)

I use the "python-evtx" library, you can install it using this command:
pip install python-evtx
In my case, I'm not interested in reading records with the "Information" level.
import os
import codecs
from lxml import etree
import Evtx.Evtx as evtx
def evtxFile(absolutePath, filenameWithExt, ext, _fromDate, _toDate):
print("Reading: " + filenameWithExt)
outText = ""
channel = ""
#read the windows event viewer log and convert its contents to XML
with codecs.open(tempFilePath, "a+", "utf-8", "ignore") as tempFile:
with evtx.Evtx(absolutePath) as log:
for record in log.records():
xmlLine = record.xml()
xmlLine = xmlLine.replace(" xmlns=\"http://schemas.microsoft.com/win/2004/08/events/event\"", "")
xmlParse = etree.XML(xmlLine)
level = parseXMLtoString(xmlParse, ".//Level/text()")
if not level == "0" and not level == "4":
providerName = parseXMLtoString(xmlParse, ".//Provider/#Name")
qualifiers = parseXMLtoString(xmlParse, ".//EventID/#Qualifiers")
timestamp = parseXMLtoString(xmlParse, ".//TimeCreated/#SystemTime")
eventID = parseXMLtoString(xmlParse, ".//EventID/text()")
task = parseXMLtoString(xmlParse, ".//Task/text()")
keywords = parseXMLtoString(xmlParse, ".//Keywords/text()")
eventRecordID = parseXMLtoString(xmlParse, ".//EventRecordID/text()")
channel = parseXMLtoString(xmlParse, ".//Channel/text()")
computer = parseXMLtoString(xmlParse, ".//Computer/text()")
message = parseXMLtoString(xmlParse, ".//Data/text()")
if level == "1":
level = "Critical"
elif level == "2":
level = "Error"
elif level == "3":
level = "Warning"
date = timestamp[0:10]
time = timestamp[11:19]
time = time.replace(".", "")
_date = datetime.strptime(date, "%Y-%m-%d").date()
if _fromDate <= _date <= _toDate:
message = message.replace("<string>", "")
message = message.replace("</string>", "")
message = message.replace("\r\n", " ")
message = message.replace("\n\r", " ")
message = message.replace("\n", " ")
message = message.replace("\r", " ")
outText = date + " " + time + "|" + level + "|" + message.strip() + "|" + task + "|" + computer + "|" + providerName + "|" + qualifiers + "|" + eventID + "|" + eventRecordID + "|" + keywords + "\n"
tempFile.writelines(outText)
with codecs.open(tempFilePath, "r", "utf-8", "ignore") as tempFile2:
myLinesFromDateRange = tempFile2.readlines()
#delete the temporary file that was created
os.remove(tempFilePath)
if len(myLinesFromDateRange) > 0:
createFolder("\\filtered_data_files\\")
outFilename = "windows_" + channel.lower() + "_event_viewer_logs" + ext
myLinesFromDateRange.sort()
#remove duplicate records from the list
myFinalLinesFromDateRange = list(set(myLinesFromDateRange))
myFinalLinesFromDateRange.sort()
with codecs.open(os.getcwd() + "\\filtered_data_files\\" + outFilename, "a+", "utf-8", "ignore") as linesFromDateRange:
linesFromDateRange.seek(0)
if len(linesFromDateRange.read(100)) > 0:
linesFromDateRange.writelines("\n")
linesFromDateRange.writelines(myFinalLinesFromDateRange)
del myLinesFromDateRange[:]
del myFinalLinesFromDateRange[:]
else:
print("No data was found within the specified date range.")
print("Closing: " + filenameWithExt)
I hope it helps you or someone else in the future.
EDIT:
The "tempFilePath" can be anything you want, for example:
tempFilePath = os.getcwd() + "\\tempFile.txt"
I collected some information first before calling the "evtxFile" function:
The "From" and the "To" dates are in the following format: YYYY-MM-DD
Converted the dates to "date" data type:
_fromDate = datetime.strptime(fromDate, "%Y-%m-%d").date()
_toDate = datetime.strptime(toDate, "%Y-%m-%d").date()
Divided the directory where the .evtx files are located into different parts:
def splitDirectory(root, file):
absolutePathOfFile = os.path.join(root, file)
filePathWithoutFilename = os.path.split(absolutePathOfFile)[0]
filenameWithExt = os.path.split(absolutePathOfFile)[1]
filenameWithoutExt = os.path.splitext(filenameWithExt)[0]
extension = os.path.splitext(filenameWithExt)[1]
return absolutePathOfFile, filePathWithoutFilename, filenameWithExt, filenameWithoutExt, extension
for root, subFolders, files in os.walk(directoryPath):
for f in files:
absolutePathOfFile, filePathWithoutFilename, filenameWithExt,
filenameWithoutExt, extension = splitDirectory(root, f)
if extension == ".evtx":
evtxFile(absolutePathOfFile, filenameWithExt, ".txt", _fromDate, _toDate)

Python SQLite3 - cursor.execute - no error

This is a piece of code which needs to perform the follow functionality:
Dump all table names in a database
From each table search for a column with either Latitude or Longitude in
Store these co-ords as a json file
The code was tested and working on a single database. However once it was put into another piece of code which calls it with different databases it now is not entering line 49. However there is no error either so I am struggling to see what the issue is as I have not changed anything.
Code snippet line 48 is the bottom line -
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
print (cursor)
for tablerow in cursor.fetchall():
I am running this in the /tmp/ dir due to an earlier error with sqlite not working outside the temp.
Any questions please ask them.
Thanks!!
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sqlite3
import os
import sys
filename = sys.argv[1]
def validateFile(filename):
filename, fileExt = os.path.splitext(filename)
print ("[Jconsole] Python: Filename being tested - " + filename)
if fileExt == '.db':
databases(filename)
elif fileExt == '.json':
jsons(fileExt)
elif fileExt == '':
blank()
else:
print ('Unsupported format')
print (fileExt)
def validate(number):
try:
number = float(number)
if -90 <= number <= 180:
return True
else:
return False
except ValueError:
pass
def databases(filename):
dbName = sys.argv[2]
print (dbName)
idCounter = 0
mainList = []
lat = 0
lon = 0
with sqlite3.connect(filename) as conn:
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
print (cursor)
for tablerow in cursor.fetchall():
print ("YAY1")
table = tablerow[0]
cursor.execute('SELECT * FROM {t}'.format(t=table))
for row in cursor:
print(row)
print ("YAY")
tempList = []
for field in row.keys():
tempList.append(str(field))
tempList.append(str(row[field]))
for i in tempList:
if i in ('latitude', 'Latitude'):
index = tempList.index(i)
if validate(tempList[index + 1]):
idCounter += 1
tempList.append(idCounter)
(current_item, next_item) = \
(tempList[index], tempList[index + 1])
lat = next_item
if i in ('longitude', 'Longitude'):
index = tempList.index(i)
if validate(tempList[index + 1]):
(current_item, next_item) = \
(tempList[index], tempList[index + 1])
lon = next_item
result = '{ "id": ' + str(idCounter) \
+ ', "content": "' + dbName + '", "title": "' \
+ str(lat) + '", "className": "' + str(lon) \
+ '", "type": "box"},'
mainList.append(result)
file = open('appData.json', 'a')
for item in mainList:
file.write('%s\n' % item)
file.close()
# {
# ...."id": 1,
# ...."content": "<a class='thumbnail' href='./img/thumbs/thumb_IMG_20161102_151122.jpg'>IMG_20161102_151122.jpg</><span><img src='./img/thumbs/thumb_IMG_20161102_151122.jpg' border='0' /></span></a>",
# ...."title": "50.7700721944444",
# ...."className": "-0.8727045",
# ...."start": "2016-11-02 15:11:22",
# ...."type": "box"
# },
def jsons(filename):
print ('JSON')
def blank():
print ('blank')
validateFile(filename)

Fixed.
The issue was up here
filename, fileExt = os.path.splitext(filename)
The filename variable was being overwritten without the file extension so when SQLite searched it didn't find the file.
Strange no error appeared but it is fixed now by changing the filename var to filename1.

Python script that performs line matching over stale files generates inconsistent output

I created a python script to parse mail (exim) logfiles and execute pattern matching in order to get a top 100 list for most send domains on my smtp servers.
However, everytime I execute the script I get a different count.
These are stale logfiles, and I cannot find a functional flaw in my code.
Example output:
1:
70353 gmail.com
68337 hotmail.com
53657 yahoo.com
2:
70020 gmail.com
67741 hotmail.com
54397 yahoo.com
3:
70191 gmail.com
67917 hotmail.com
54438 yahoo.com
Code:
#!/usr/bin/env python
import os
import datetime
import re
from collections import defaultdict
class DomainCounter(object):
def __init__(self):
self.base_path = '/opt/mail_log'
self.tmp = []
self.date = datetime.date.today() - datetime.timedelta(days=14)
self.file_out = '/var/tmp/parsed_exim_files-' + str(self.date.strftime('%Y%m%d')) + '.decompressed'
def parse_log_files(self):
sub_dir = os.listdir(self.base_path)
for directory in sub_dir:
if re.search('smtp\d+', directory):
fileInput = self.base_path + '/' + directory + '/maillog-' + str(self.date.strftime('%Y%m%d')) + '.bz2'
if not os.path.isfile(self.file_out):
os.popen('touch ' + self.file_out)
proccessFiles = os.popen('/bin/bunzip2 -cd ' + fileInput + ' > ' + self.file_out)
accessFileHandle = open(self.file_out, 'r')
readFileHandle = accessFileHandle.readlines()
print "Proccessing %s." % fileInput
for line in readFileHandle:
if '<=' in line and ' for ' in line and '<>' not in line:
distinctLine = line.split(' for ')
recipientAddresses = distinctLine[1].strip()
recipientAddressList = recipientAddresses.strip().split(' ')
if len(recipientAddressList) > 1:
for emailaddress in recipientAddressList:
# Since syslog messages are transmitted over UDP some messages are dropped and needs to be filtered out.
if '#' in emailaddress:
(login, domein) = emailaddress.split("#")
self.tmp.append(domein)
continue
else:
try:
(login, domein) = recipientAddressList[0].split("#")
self.tmp.append(domein)
except Exception as e:
print e, '<<No valid email address found, skipping line>>'
accessFileHandle.close()
os.unlink(self.file_out)
return self.tmp
if __name__ == '__main__':
domainCounter = DomainCounter()
result = domainCounter.parse_log_files()
domainCounts = defaultdict(int)
top = 100
for domain in result:
domainCounts[domain] += 1
sortedDict = dict(sorted(domainCounts.items(), key=lambda x: x[1], reverse=True)[:int(top)])
for w in sorted(sortedDict, key=sortedDict.get, reverse=True):
print '%-3s %s' % (sortedDict[w], w)

proccessFiles = os.popen('/bin/bunzip2 -cd ' + fileInput + ' > ' + self.file_out)
This line is non-blocking. Therefore it will start the command, but the few following lines are already reading the file. This is basically a concurrency issue. Try to wait for the command to complete before reading the file.
Also see:
Python popen command. Wait until the command is finished since os.popen is deprecated since python-2.6 (depending on which version you are using).
Sidenote - The same happens to the line below. The file may, or may not, exist after executing the following line:
os.popen('touch ' + self.file_out)

Python - file management and processing multiple zip files

I have some zip files in a folder. I have a script to process them. The data that is to be written to a database is in a different file and its structure is as follows:
some_text;database;file_name
some_text2;database2;file_name2
....
What is the best way to process this file? Also, an error message should be reported if there is no matching zip file name in that file.
My current code:
filelist = glob.glob(os.path.join(rootdir, '*.zip'))
if filelist:
for file in filelist:
print "Working on file ", file
#get only file name without .zip for compare
aa = file.split(sl)
bb = aa[len(aa) -1]
cc = bb.split(".")
ime_sole = cc[0]
fle = codecs.open(rootdir + sl + 'portal_schools.txt',
'r',encoding="cp1250")
line = fle.readline()
# Read lines
for line in iter(fle):
#print line,
a,b,c = line.split(";")
if c == ime_sole:
print c
database = str(b)
#distdir = str(c)
else:
print "some text"
return
fle.close()
But this fails because it is being read line by line. If in the first line there is no match, the code stops. I need it to continue trough the file and then, after all is done, start with a new zip file.

I know my code is far from perfect. The problem was with else. I moved it to the end of the whole code. It was a novice mistake. I also inserted try-catch so if it fails on one zip file, the next one is still processed. Now, it looks something like this:
filelist = glob.glob(os.path.join(rootdir, '*.zip'))
if filelist:
for file in filelist:
try:
aa = file.split(sl)
#print "aa ",aa
bb = aa[len(aa) -1]
#print "bb ", bb
cc = bb.split(".")
#print "cc ", cc
ime_sole = cc[0]
#print "imesole ", ime_sole
fle = codecs.open(rootdir + sl + 'portal_schools.txt','r',encoding="cp1250")
#line = fle.readline()
data = []
for line in iter(fle):
line = line.replace("\r\n", "")
x = line.split(";")
data.append(x)
result = [element for element in data if element[2] == ime_sole]
fle.close()
#print result
if result:
database = result[0][1]
vnos_data = "Podatki za %s , se vpisujejo v bazo %s " % (ime_sole, database)
host ="####"
user="####"
password = "####"
iUrnik_tables = iUrnik_tables_fromzip.Tables(defdir,file,sl,host,database,user,password)
id_skripte =iUrnik_tables[0]
date_begin = iUrnik_tables[1]
date_end = iUrnik_tables[2]
iUrnik_all_fromzip.FileWork(defdir,file,sl,host,database,user,password)
iUrnik_itt_zip.Proces(defdir,file,sl,host,database,user,password,id_skripte,date_begin,date_end)
trenutek = datetime.datetime.now()
trenutek = trenutek.strftime("%Y%m%d%H%M")
newfilename = os.path.splitext(file)[0]
newfilename = newfilename +"_" + str(trenutek) + os.path.splitext(file)[1]
folder = defdir + sl + ime_sole + sl + "archive"
destination = folder + sl
novoimezipa= destination + newfilename.split(sl)[-1]
if not os.path.exists(folder):
os.makedirs(folder)
os.chdir(folder)
shutil.copy(file,destination)
old = destination + file.split(sl)[-1]
os.rename(old , novoimezipa )
os.remove(file)
else:
nothing :)
#return
except:
print sys.exc_info()
else:
vnos_nodata= u"V mapi %s ni podatkov za prenos" % (rootdir)
Logging(defdir, sl, vnos_nodata)
I know it is not perfect but it works :)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

separate line output by groups - python

Related

How to run an .sql file full of commands as a command line argument to a python program?

How to read .evtx file using python?

Python SQLite3 - cursor.execute - no error

Python script that performs line matching over stale files generates inconsistent output

Python - file management and processing multiple zip files

Categories

Resources