Split log data by MB size in Python

Split log data by MB size in Python - python

There is a process that generates log data of size more than 10 mb. I have been instructed to split the data into 10mb chunks maximum and write to text files means if the log size is 25 mb then it should be divided into 3 parts - 10, 10, 5mb and written to 3 text files.
Also the second and third text file names should be like "file..._1", "file..._2". To write the _1 and _2, I am using the code - filename="file" + "_" + np.arange(1, 10, 1) + ".txt" but when it is creating a new file with underscore, it is giving UFuncTypeError.
My code is:
def writelog(self, filename, msgstr):
#writing log to .txt file
filename = "log-" + str(date.today()) + ".txt"
current_date_and_time = str(datetime.now())
logfile = open(filename, 'a')
logfile.write(current_date_and_time + msgstr)
logfile.close()
#checking if the text file is more than 10mb, then create a new file
filelocation = "...location.../log-2021-07-20.txt"
filesize = os.stat(filelocation)
sizeoflog = filesize.st_size / (1024 * 1024)
print('Size of log in MB- ' + str(sizeoflog))
if sizeoflog > 10:
filename = "log-" + str(date.today()) + "_" + np.arange(1, 10, 1) + ".txt"
logfile = open(filename, 'a')
logfile.write(current_date_and_time + msgstr)
logfile.close()
return filename
msgstr is a dictionary that I passed in main.py
So, the summary is:
split the data into 10mb chunks each and write to file
first file name will be like log-today's date.txt, second file name will be log-today's date_1.txt and so on.
each file content should start with current_date_and_time and then the msgstr.
How can I address these problems ? I am a beginner in Python..

Here's my approach. I created 2 simple helper functions, one for the filesize (with a try: except block) and another to find the last logfile with a size under 10MB.
Since they don't care about the class itself, you should use the #staticmethod decorator. Note that you need to change the method calls to both getsize() and find_current_log() as I don't know the class name.
from datetime import datetime
import os
class ClassNameGoesHere:
#staticmethod
def getsize(filename):
try:
return os.stat(filename).st_size / 1048576
except FileNotFoundError:
return 0
#staticmethod
def find_current_log(filename):
base_filename = os.path.basename(filename)
if '_' in base_filename:
counter = int(base_filename.split('_')[1].split('.')[0])
else:
counter = 0
while ClassNameGoesHere.getsize(filename) >= 10:
counter += 1
if '_' in base_filename:
base_filename = f"{base_filename.split('_')[0]}_{counter}.txt"
else:
base_filename = f"{base_filename.split('.')[0]}_{counter}.txt"
filename = f'{os.path.dirname(filename)}{os.sep}{base_filename}'
return filename
def writelog(self, filename, msgstr):
filename = ClassNameGoesHere.find_current_log(filename)
with open(filename, 'a') as outfile:
outfile.write(f'{datetime.now()} | {msgstr}\n')
somelogger = ClassNameGoesHere()
somelogger.writelog('path/to/file/log-2021-07-21.txt', 'this is a test messsage')

Related

Create new files in sequence when size exceeds 1 MB

My below code creates exception log files at location-
C:/Users/Desktop/SampleTestFiles/ProjectFiles/ExceptionLogFiles/
Initially code keeps writing into ExceptionLog_1.txt file whenever exception occurs and when the size of file exceeds 1 MB it starts writing to ExceptionLog_2.txt until its size is 1 MB. So far, it works perfect only for these 2 file creations and writing. When size of second file exceeds 1 MB it should log exceptions into a third log file ExceptionLog_3.txt. But, it does not works. Code keeps on writing into second file.
How to modify my code to make sure a new file is created when size of latest log file exceeds 1 MB?
def WriteExceptionToFile(self, traceback):
count = 1
fileDir = 'C:/Users/Desktop/SampleTestFiles/ProjectFiles/ExceptionLogFiles/'
# check if the path exists, create directory if not.
if not (os.path.exists):
os.mkdir(fileDir)
filename = "ExceptionLog_"+ str(count) +".txt"
filepath = os.path.join(fileDir, filename)
try:
if os.path.getsize(filepath) < 1048576: # if file size is less than 1 MB
filename = "ExceptionLog_" + str(count) + ".txt"
else:
filename = "ExceptionLog_" + str(count + 1) + ".txt"
except OSError:
Print("Path '%s' does not exists or is inaccessible" % filepath)
filename = "ExceptionLog_1.txt"
filepath = os.path.join(fileDir, filename)
with open(filepath, 'a+') as f:
traceback.print_exc(file=f)
f.close()

You could also try an approach using rotating files from the logging module.
Example directly from the documentation (https://docs.python.org/3/howto/logging-cookbook.html):
import glob
import logging
import logging.handlers
LOG_FILENAME = 'logging_rotatingfile_example.out'
# Set up a specific logger with our desired output level
my_logger = logging.getLogger('MyLogger')
my_logger.setLevel(logging.DEBUG)
# Add the log message handler to the logger, HERE YOU CAN SPECIFY THE FILE SIZE
handler = logging.handlers.RotatingFileHandler(
LOG_FILENAME, maxBytes=20, backupCount=5)
my_logger.addHandler(handler)
# Log some messages
for i in range(20):
my_logger.debug('i = %d' % i)
# See what files are created
logfiles = glob.glob('%s*' % LOG_FILENAME)
for filename in logfiles:
print(filename)

I would suggest you to go with using a class that way you wont have to worry about maintaining the correct count elsewhere.
Checkout the solution below
import os
class GenericExceptionWriter:
def __init__(self):
self.count = 1
self.fileDir = 'C:/Users/Desktop/SampleTestFiles/ProjectFiles/ExceptionLogFiles/'
os.makedirs(self.fileDir, exist_ok=True)
self.currentFilePath = "".join(["ExceptionLog_",str(self.count),".txt"])
self.maxSize = 1048576
def checkSize(self):
if os.path.getsize(self.currentFilePath) > self.maxSize:
self.count += 1
self.currentFilePath = "".join(["ExceptionLog_",str(self.count),".txt"])
def WriteExceptionToFile(self, traceback):
try:
self.checkSize()
except OSError:
print("Path '%s' does not exists or is inaccessible" % self.currentFilePath)
filepath = os.path.join(self.fileDir, self.currentFilePath)
with open(filepath, 'a+') as f:
traceback.print_exc(file=f)
f.close()

Python - How to stop the loop

I have this where it reads a file called source1.html, source2.html, source3.html, but when it cant find the next file (because it doesnt exist) it gives me a error. there can be an x amount of sourceX.html, so i need something to say if the next sourcex.html file can not be found, stop the loop.
Traceback (most recent call last): File "main.py", line 14, in
file = open(filename, "r") IOError: [Errno 2] No such file or
directory: 'source4.html
how can i stop the script looking for the next source file?
from bs4 import BeautifulSoup
import re
import os.path
n = 1
filename = "source" + str(n) + ".html"
savefile = open('OUTPUT.csv', 'w')
while os.path.isfile(filename):
strjpgs = "Extracted Layers: \n \n"
filename = "source" + str(n) + ".html"
n = n + 1
file = open(filename, "r")
soup = BeautifulSoup(file, "html.parser")
thedata = soup.find("div", class_="cplayer")
strdata = str(thedata)
DoRegEx = re.compile('/([^/]+)\.jpg')
jpgs = DoRegEx.findall(strdata)
strjpgs = strjpgs + "\n".join(jpgs) + "\n \n"
savefile.write(filename + '\n')
savefile.write(strjpgs)
print(filename)
print(strjpgs)
savefile.close()
print "done"

use a try / except and break
while os.path.isfile(filename):
try: # try to do this
# <your code>
except FileNotFoundError: # if this error occurs
break # exit the loop
The reason your code doesn't currently work is you're checking the previous file exists in your while loop. Not the next one. Hence you could also do
while True:
strjpgs = "Extracted Layers: \n \n"
filename = "source" + str(n) + ".html"
if not os.path.isfile(filename):
break
# <rest of your code>

you can try opening file, and break out of while loop once you catch an IOError exception.
from bs4 import BeautifulSoup
import re
import os.path
n = 1
filename = "source" + str(n) + ".html"
savefile = open('OUTPUT.csv', 'w')
while os.path.isfile(filename):
try:
strjpgs = "Extracted Layers: \n \n"
filename = "source" + str(n) + ".html"
n = n + 1
file = open(filename, "r")
except IOError:
print("file not found! breaking out of loop.")
break
soup = BeautifulSoup(file, "html.parser")
thedata = soup.find("div", class_="cplayer")
strdata = str(thedata)
DoRegEx = re.compile('/([^/]+)\.jpg')
jpgs = DoRegEx.findall(strdata)
strjpgs = strjpgs + "\n".join(jpgs) + "\n \n"
savefile.write(filename + '\n')
savefile.write(strjpgs)
print(filename)
print(strjpgs)
savefile.close()
print "done"

I'll suggest you to use os.path.exists() (which returns True/False) and os.path.isfile() both.
Use with statement to open file. It is Pythonic way to open files.
with statement is best preferred among the professional coders.
These are the contents of my current working directory.
H:\RishikeshAgrawani\Projects\Stk\ReadHtmlFiles>dir
Volume in drive H is New Volume
Volume Serial Number is C867-828E
Directory of H:\RishikeshAgrawani\Projects\Stk\ReadHtmlFiles
11/05/2018 16:12 <DIR> .
11/05/2018 16:12 <DIR> ..
11/05/2018 15:54 106 source1.html
11/05/2018 15:54 106 source2.html
11/05/2018 15:54 106 source3.html
11/05/2018 16:12 0 stopReadingIfNot.md
11/05/2018 16:11 521 stopReadingIfNot.py
5 File(s) 839 bytes
2 Dir(s) 196,260,925,440 bytes free
The below Python code shows how will you read files source1.html, source2.html, source.3.html and stop if there is no more files of the form sourceX.html (where X is 1, 2, 3, 4, ... etc.).
Sample code:
import os
n = 1;
html_file_name = 'source%d.html'
# It is necessary to check if sourceX.html is file or directory.
# If it is directory the check it if it exists or not.
# It it exists then perform operation (read/write etc.) on file.
while os.path.isfile(html_file_name % (n)) and os.path.exists(html_file_name % (n)):
print "Reading ", html_file_name % (n)
# The best way (Pythonic way) to open file
# You don't need to bother about closing the file
# It will be taken care by with statement
with open(html_file_name % (n), "r") as file:
# Make sure it works
print html_file_name % (n), " exists\n";
n += 1;
Output:
H:\RishikeshAgrawani\Projects\Stk\ReadHtmlFiles>python stopReadingIfNot.py
Reading source1.html
source1.html exists
Reading source2.html
source2.html exists
Reading source3.html
source3.html exists
So based on the above logic. you can modify your code. It will work.
Thanks.

This appears to be a sequence error. Let's look at a small fragment of your code, specifically lines dealing with filename:
filename = "source" + str(n) + ".html"
while os.path.isfile(filename):
filename = "source" + str(n) + ".html"
n = n + 1
file = open(filename, "r")
You're generating the next filename before you open the file (or really, checking the old filename then opening a new one). It's a little hard to see because you're really updating n while filename holds the previous number, but if we look at them in sequence it pops out:
n = 1
filename = "source1.html" # before loop
while os.path.isfile(filename):
filename = "source1.html" # first time inside loop
n = 2
open(filename)
while os.path.isfile(filename): # second time in loop - still source1
filename = "source2.html"
n = 3
open(filename) # We haven't checked if this file exists!
We can fix this a few ways. One is to move the entire updating, n before filename, to the end of the loop. Another is to let the loop mechanism update n, which is a sight easier (the real fix here is that we only use one filename value in each iteration of the loop):
for n in itertools.count(1):
filename = "source{}.html".format(n)
if not os.path.isfile(filename):
break
file = open(filename, "r")
#...
At the risk of looking rather obscure, we can also express the steps functionally (I'm using six here to avoid a difference between Python 2 and 3; Python 2's map wouldn't finish):
from six.moves import map
from itertools import count, takewhile
numbers = count(1)
filenames = map('source{}.html'.format, numbers)
existingfiles = takewhile(os.path.isfile, filenames)
for filename in existingfiles:
file = open(filename, "r")
#...
Other options include iterating over the numbers alone and using break when isfile returns False, or simply catching the exception when open fails (eliminating the need for isfile entirely).

Iterating through file sizes recursively

Hi I have created a mini program to get the MD5 hash of all files in the directory the script is located.
My problem is when I generate the get_size() of the files I end up with only one line in my filelist.md5.txt and get_size() seems to be outputting the entire directory's sum instead of each individual files size.
How can I output individual files size in this script?
I get this output in the filelist.md5.txt file:
#
# GENERATE_FILELIST
# (filename) (filesize) (md5) (major_version) (minor_version)
#
Test_2.txt 190 dea9fe052f1abf71bac7421c732b0475 ---- ----
However I want to get this output:
#
# GENERATE_FILELIST
# (filename) (filesize) (md5) (major_version) (minor_version)
#
MD5.bat filesize b9a7c825517002e0da8e980c2c2c2cef ---- ----
MD5.py filesize b61124e8bef473d377f59aa0964174ce ---- ----
MD5test.bat filesize f29d68f9721c57d648164cae79dac71b ---- ----
MD5test.py filesize a7a3c45ebe1aca82f57591c7fccd6cfc ---- ----
MD5v1.bat filesize e5e7407117845a2413fe667fe7a2f681 ---- ----
MD5v1.py filesize 55ab90b5a623548825a0b40406fcdde2 ---- ----
MD5v2.bat filesize e9e31aaa62f6f37572cf89a03860cb96 ---- ----
MD5v3.bat filesize 559c0e9ed05fc9b4884c83bc3e04f8fd ---- ----
MD5v3.py filesize d20a8841f3c37d28fd3b74847731e212 ---- ----
Test_2.txt filesize dea9fe052f1abf71bac7421c732b0475 ---- ----
Code so far:
import glob
import hashlib
import sys
import os
filenames = glob.glob('*.*')
# truncate the file to zero length before opening
f1 = open(os.path.expanduser(sys.path[0]) + '\\filelist.md5.txt', 'w')
#'a' will append the file, rather than write over whatever else you put in it like 'w'
with open('filelist.md5.txt', 'a') as f:
print >> f,''
print >> f,'#'
print >> f,'# GENERATE_FILELIST'
print >> f,'# (filename) (filesize) (md5) (major_version) (minor_version)'
print >> f,'#'
print >> f,''
f.close()
# print to console
for filename in filenames:
with open(filename, 'rb') as inputfile:
data = inputfile.read()
print '. -- ',filename, ' ---------- ', hashlib.md5(data).hexdigest()
# get the size of each file
def get_size(start_path = '.'):
total_size = 0
for dirpath, dirnames, filenames in os.walk(start_path):
for fn in filenames:
fp = os.path.join(dirpath, fn)
total_size += os.path.getsize(fp)
return total_size
#'a' will append the file, rather than write over whatever else you put in it like 'w'
with open('filelist.md5.txt', 'a') as f:
print >> f,'{:44}'.format(filename), get_size(),' ', hashlib.md5(data).hexdigest(),' ','----',' ','----'
f.close()

Your get_size() is written to return the size of the whole directory, which is not what you're looking for.
dir=r'specify\path\here'
with open('filelist.md5.txt', 'w') as fx:
for f in os.listdir(dir):
path = os.path.join(dir, f)
if os.path.isfile(path):
# specify anything else you want to write inside fx.write()
fx.write(f + "\t\t" + str(os.path.getsize(path)) + "\n")
The above code writes the file name and size separated by tabs, and in separate lines.
You don't have to explicitly close when you're doing with open('filelist.md5.txt', 'a') as f:

Try This ( Works Better for larg_file, none_ascci_format_files_names, whitout glob module, and error_handling ):
import hashlib, os, hashlib, sys
your_target_folder = "." # put your folder or just this "."
def get_size(filename):
st = os.stat(filename)
return str(st.st_size)
def get_minor_version(filename):
# Your Code ...
return "minor_version"
def get_major_version(filename):
# Your Code ...
return "major_version"
def get_md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(2 ** 20), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
# this function works for none ascii files names ( like chinese format )!!
def sys_out(out_data):
try:
print(out_data)
except UnicodeEncodeError:
if sys.version_info >= (3,):
print(out_data.encode('utf8').decode(sys.stdout.encoding))
else:
print(out_data.encode('utf8'))
def make_beautiful_terminal_output(get_list):
col_width = max(len(word) for word in get_list) + 3 # padding
buffer_last = []
for row in get_list:
buffer_last.append("".join(word.ljust(col_width) for word in get_list))
return buffer_last[0]
def print_header():
header_tag = "(filename) (filesize) (md5) (major_version) (minor_version)\n"
with open("filelist.md5.txt", "a") as my_header:
my_header.write(header_tag)
print(header_tag)
print_header()
for dirpath, _, filenames in os.walk(your_target_folder):
for items in filenames:
file_full_path = os.path.abspath(os.path.join(dirpath, items))
try:
my_last_data = [items, get_size(file_full_path), get_md5(file_full_path), get_major_version(file_full_path), get_minor_version(file_full_path)]
terminal_output = make_beautiful_terminal_output(my_last_data)
sys_out(terminal_output)
re_buffer = terminal_output + "\n"
with open("filelist.md5.txt", "a", encoding='utf-8') as my_save_file:
my_save_file.write(re_buffer)
except:
sys_out("Error On " + str(file_full_path))

Python replace string with random digits in multiple files

This code reads files from a directory and replaces string "400432" with a random 6 digits generated by python, builds a script and executes.
Each file should have a unique random 6 digits with corresponding script. File 1: 123456 and script 123456.sh, file 2: 775463 and script 775463.sh, etc.
The problem is when multiple files are present the 123456 is written into all files and only 123456.sh is created. I am not sure how to loop this and it's been 3 years since I've used python.
Struggling to finish this up. Thanks for any help!
#!/usr/bin/python
import os
import stat
import fileinput
import glob
import sys
import re
import random
import subprocess
import string
##############################################################
# Generates random numbers and replaces in WSP files
##############################################################
def random_digits(y):
return ''.join(random.choice(string.digits) for x in range(y))
rand = str(random_digits(6))
_replace_re = re.compile("400432")
for dirpath, dirnames, filenames in os.walk("/home/mark/WSP_IN/"):
for file in filenames:
file = os.path.join(dirpath, file)
head, tail = os.path.split(file)
tempfile = file + ".temp"
with open(tempfile, "w") as target:
with open(file) as source:
for line in source:
line = _replace_re.sub(rand, line)
target.write(line)
os.rename(tempfile, file)
##############################################################
# Creates script that builds ship to based on random numbers
##############################################################
s = open('/usr/local/bin/wsp_scripts/' + rand + '.sh', 'wb+')
##############################################################
# Define responses as string arguments rather than text
##############################################################
data = '/usr/local/bin/wsp_scripts/'
ext = '.sh'
space = ' ';
script = 'exec ${B}/pro5 -c${CF}'
pathx = ' -m2048 -q ${P} - '
swfile = str(tail+space)
swship = str(rand)
sscript = str(script)
spathx = str(pathx)
shell = str(data+rand+ext)
##############################################################
# Write static text and arguments into file
##############################################################
s.write('#!/bin/bash' + '\n')
s.write('#termsoa=/soatermsystem' + '\n'),
s.write('PATH=$PATH:/usr/basic:/usr/basic/util' + '\n')
s.write('B=/usr/local/basis/pro5' + '\n')
s.write('P=/usr5/prog/utils/SO/EDIshipbuild' + '\n')
s.write('CF=/usr/local/basis/pro5/config.bbx'+ '\n')
s.write('TERMCAP=${B}/termcap' + '\n')
s.write('export PATH TERM TERMCAP' + '\n')
s.write('umask 0' + '\n')
s.write('cd /usr/basic' + '\n')
s.write(sscript+spathx+swfile+rand)
st = os.stat(shell)
os.chmod(shell, st.st_mode | stat.S_IEXEC)
s.close()
##############################################################
# Import script just created and execute
##############################################################
cmd = '/usr/local/bin/wsp_scripts/' + rand + '.sh'
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
out, err = p.communicate()
result = out.split('\n')
for lin in result:
if not lin.startswith('#'):
print(lin)
s.close()

You define rand and create your .sh file outside of the for loop. If you want to redefine rand for each file move its definition into the inner for loop. Same goes for the .sh file creation.
Which I think would look like this.
def random_digits(y):
return ''.join(random.choice(string.digits) for x in range(y))
_replace_re = re.compile("400432")
for dirpath, dirnames, filenames in os.walk("/home/mark/WSP_IN/"):
for file in filenames:
rand = str(random_digits(6))
file = os.path.join(dirpath, file)
head, tail = os.path.split(file)
tempfile = file + ".temp"
with open(tempfile, "w") as target:
with open(file) as source:
for line in source:
line = _replace_re.sub(rand, line)
target.write(line)
os.rename(tempfile, file)
##############################################################
# Creates script that builds ship to based on random numbers
##############################################################
s = open('/usr/local/bin/wsp_scripts/' + rand + '.sh', 'wb+')

I don't know , if using 6 digits is a requirement or not?
import time
def random_digits(y):
return int(time.time())
It will return the 10 digit timestamp and will always be unique.
Let me know, if it helps!

How to save all files, not replacing the file?

I have 100 text files and I want to save it into 100 text files too. Right now, my coding can read all the files but it save only one file, which is the latest result. Here I attached the code.
def nama():
path = "C:/Amar/code/"
infilename = os.listdir(path)
print len(infilename)
for filename in infilename:
print("jumpa dah" + path + "\\"+ filename)
f = open(path + "\\" + filename, "r")
data = f.read()
f.close()
lines = data.split('\n')
outfilename = path + "result.txt"
print outfilename
f = open(outfilename , "a")

Append a string that will act as a unique identifier for each output file. You can use the input filename for this:
outfilename = path + filename + "_result.txt"
# e.g reports_result.txt

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Split log data by MB size in Python - python

Related

Create new files in sequence when size exceeds 1 MB

Python - How to stop the loop

Iterating through file sizes recursively

Python replace string with random digits in multiple files

How to save all files, not replacing the file?

Categories

Resources