Create new files in sequence when size exceeds 1 MB

Create new files in sequence when size exceeds 1 MB - python

My below code creates exception log files at location-
C:/Users/Desktop/SampleTestFiles/ProjectFiles/ExceptionLogFiles/
Initially code keeps writing into ExceptionLog_1.txt file whenever exception occurs and when the size of file exceeds 1 MB it starts writing to ExceptionLog_2.txt until its size is 1 MB. So far, it works perfect only for these 2 file creations and writing. When size of second file exceeds 1 MB it should log exceptions into a third log file ExceptionLog_3.txt. But, it does not works. Code keeps on writing into second file.
How to modify my code to make sure a new file is created when size of latest log file exceeds 1 MB?
def WriteExceptionToFile(self, traceback):
count = 1
fileDir = 'C:/Users/Desktop/SampleTestFiles/ProjectFiles/ExceptionLogFiles/'
# check if the path exists, create directory if not.
if not (os.path.exists):
os.mkdir(fileDir)
filename = "ExceptionLog_"+ str(count) +".txt"
filepath = os.path.join(fileDir, filename)
try:
if os.path.getsize(filepath) < 1048576: # if file size is less than 1 MB
filename = "ExceptionLog_" + str(count) + ".txt"
else:
filename = "ExceptionLog_" + str(count + 1) + ".txt"
except OSError:
Print("Path '%s' does not exists or is inaccessible" % filepath)
filename = "ExceptionLog_1.txt"
filepath = os.path.join(fileDir, filename)
with open(filepath, 'a+') as f:
traceback.print_exc(file=f)
f.close()

You could also try an approach using rotating files from the logging module.
Example directly from the documentation (https://docs.python.org/3/howto/logging-cookbook.html):
import glob
import logging
import logging.handlers
LOG_FILENAME = 'logging_rotatingfile_example.out'
# Set up a specific logger with our desired output level
my_logger = logging.getLogger('MyLogger')
my_logger.setLevel(logging.DEBUG)
# Add the log message handler to the logger, HERE YOU CAN SPECIFY THE FILE SIZE
handler = logging.handlers.RotatingFileHandler(
LOG_FILENAME, maxBytes=20, backupCount=5)
my_logger.addHandler(handler)
# Log some messages
for i in range(20):
my_logger.debug('i = %d' % i)
# See what files are created
logfiles = glob.glob('%s*' % LOG_FILENAME)
for filename in logfiles:
print(filename)

I would suggest you to go with using a class that way you wont have to worry about maintaining the correct count elsewhere.
Checkout the solution below
import os
class GenericExceptionWriter:
def __init__(self):
self.count = 1
self.fileDir = 'C:/Users/Desktop/SampleTestFiles/ProjectFiles/ExceptionLogFiles/'
os.makedirs(self.fileDir, exist_ok=True)
self.currentFilePath = "".join(["ExceptionLog_",str(self.count),".txt"])
self.maxSize = 1048576
def checkSize(self):
if os.path.getsize(self.currentFilePath) > self.maxSize:
self.count += 1
self.currentFilePath = "".join(["ExceptionLog_",str(self.count),".txt"])
def WriteExceptionToFile(self, traceback):
try:
self.checkSize()
except OSError:
print("Path '%s' does not exists or is inaccessible" % self.currentFilePath)
filepath = os.path.join(self.fileDir, self.currentFilePath)
with open(filepath, 'a+') as f:
traceback.print_exc(file=f)
f.close()

Related

increment csv file number after loop completion

I am executing a loop and save data in a csv file. to store the data i am creating a path and directory and append the csv file. as it is the code executes fine and generates one csv file upon completion. i would like the csv file to increment each time i run the code so i dont have to delete or overwrite the previous one such a way that i get file_0 for run1, file_1 for run 2 and so on. i inserted `# fileName = "{}/{}_{}.csv".format(file_path[0], self.file_name, file_path[1])'' but this saves me each point in a separate file. any suggestions welcome. thanks.
import csv
from fileinput import filename
from locale import currency
import time
from numpy import append, outer
import time
from datetime import datetime
import os
from random import random
#create folder timestamped
class CreateFile():
def __init__(self, procedure):
self.procedure = procedure # Get the procedure name.
self.createfile() # Call create file function.
def createfile(self):
date = datetime.now().date()
PARENT_DIR = "C:\test/".format(date) # Get the path.
DIR = '{}'.format(self.procedure) # Get procedure name.
self.PATH = os.path.join(PARENT_DIR, DIR) # Form a full path.
try:
if not os.path.exists(self.PATH): # If the given path does not exists.
os.makedirs(self.PATH) # Make a directory.
except OSError: # OSError occur, don't make the directory.
print ("Creation of the directory [%s] failed." % DIR) # Print message.
else: # Successfully created the director, print the message.
print ("Successfully created the directory %s " % DIR)
def get_file(self):
file_list = os.listdir(self.PATH) # Load path into list directory.
file_count = len(file_list) # Chech the number of file(s) under the given path.
return [self.PATH, file_count] # Return full path and file count under this folder.
# initialization and setpoints list
startpoint = 300
setpoint = 310
step = 10
temp_list = []
for x in range(startpoint, setpoint+1, step):
temp_list.append(x)
print(temp_list)
class save_data1():
def __init__(self, file_name):
self.file_name = file_name
file_path_count = CreateFile(file_name).get_file()
self.fileName = "{}/{}.csv".format(file_path_count[0], file_name)
def record_csv(self, fileName, now, ep1):
with open(fileName, 'a', newline='') as csvfile:
header = ["Timestamp",'Temp', "ep1"]
writer = csv.DictWriter(csvfile, fieldnames=header)
if csvfile.tell() == 0:
writer.writeheader()
writer.writerow(
{
"Timestamp": now,
'Temp': temp,
"ep1": ep1
}
)
csvfile.close()
def test(self):
file_path = CreateFile(self.file_name).get_file()
# fileName = "{}/{}_{}.csv".format(file_path[0], self.file_name, file_path[1])
fileName = "{}/{}.csv".format(file_path[0], self.file_name)
now = datetime.now()
ep1 = random() #here just random number instead of instrument
self.record_csv(fileName, now, ep1)
# Set setpoint in Temp list
for temp in temp_list:
# print a header
print('')
hdr = '______ T ______ __________ H __________\t______ Ext Param ______'
print(hdr)
time.sleep(0.5)
print('setpoint:', temp)
if temp == 300:
save_data1('meas1').test()
else:
print('Waiting ')
save_data1('meas1').test()

Split log data by MB size in Python

There is a process that generates log data of size more than 10 mb. I have been instructed to split the data into 10mb chunks maximum and write to text files means if the log size is 25 mb then it should be divided into 3 parts - 10, 10, 5mb and written to 3 text files.
Also the second and third text file names should be like "file..._1", "file..._2". To write the _1 and _2, I am using the code - filename="file" + "_" + np.arange(1, 10, 1) + ".txt" but when it is creating a new file with underscore, it is giving UFuncTypeError.
My code is:
def writelog(self, filename, msgstr):
#writing log to .txt file
filename = "log-" + str(date.today()) + ".txt"
current_date_and_time = str(datetime.now())
logfile = open(filename, 'a')
logfile.write(current_date_and_time + msgstr)
logfile.close()
#checking if the text file is more than 10mb, then create a new file
filelocation = "...location.../log-2021-07-20.txt"
filesize = os.stat(filelocation)
sizeoflog = filesize.st_size / (1024 * 1024)
print('Size of log in MB- ' + str(sizeoflog))
if sizeoflog > 10:
filename = "log-" + str(date.today()) + "_" + np.arange(1, 10, 1) + ".txt"
logfile = open(filename, 'a')
logfile.write(current_date_and_time + msgstr)
logfile.close()
return filename
msgstr is a dictionary that I passed in main.py
So, the summary is:
split the data into 10mb chunks each and write to file
first file name will be like log-today's date.txt, second file name will be log-today's date_1.txt and so on.
each file content should start with current_date_and_time and then the msgstr.
How can I address these problems ? I am a beginner in Python..

Here's my approach. I created 2 simple helper functions, one for the filesize (with a try: except block) and another to find the last logfile with a size under 10MB.
Since they don't care about the class itself, you should use the #staticmethod decorator. Note that you need to change the method calls to both getsize() and find_current_log() as I don't know the class name.
from datetime import datetime
import os
class ClassNameGoesHere:
#staticmethod
def getsize(filename):
try:
return os.stat(filename).st_size / 1048576
except FileNotFoundError:
return 0
#staticmethod
def find_current_log(filename):
base_filename = os.path.basename(filename)
if '_' in base_filename:
counter = int(base_filename.split('_')[1].split('.')[0])
else:
counter = 0
while ClassNameGoesHere.getsize(filename) >= 10:
counter += 1
if '_' in base_filename:
base_filename = f"{base_filename.split('_')[0]}_{counter}.txt"
else:
base_filename = f"{base_filename.split('.')[0]}_{counter}.txt"
filename = f'{os.path.dirname(filename)}{os.sep}{base_filename}'
return filename
def writelog(self, filename, msgstr):
filename = ClassNameGoesHere.find_current_log(filename)
with open(filename, 'a') as outfile:
outfile.write(f'{datetime.now()} | {msgstr}\n')
somelogger = ClassNameGoesHere()
somelogger.writelog('path/to/file/log-2021-07-21.txt', 'this is a test messsage')

Python - How to stop the loop

I have this where it reads a file called source1.html, source2.html, source3.html, but when it cant find the next file (because it doesnt exist) it gives me a error. there can be an x amount of sourceX.html, so i need something to say if the next sourcex.html file can not be found, stop the loop.
Traceback (most recent call last): File "main.py", line 14, in
file = open(filename, "r") IOError: [Errno 2] No such file or
directory: 'source4.html
how can i stop the script looking for the next source file?
from bs4 import BeautifulSoup
import re
import os.path
n = 1
filename = "source" + str(n) + ".html"
savefile = open('OUTPUT.csv', 'w')
while os.path.isfile(filename):
strjpgs = "Extracted Layers: \n \n"
filename = "source" + str(n) + ".html"
n = n + 1
file = open(filename, "r")
soup = BeautifulSoup(file, "html.parser")
thedata = soup.find("div", class_="cplayer")
strdata = str(thedata)
DoRegEx = re.compile('/([^/]+)\.jpg')
jpgs = DoRegEx.findall(strdata)
strjpgs = strjpgs + "\n".join(jpgs) + "\n \n"
savefile.write(filename + '\n')
savefile.write(strjpgs)
print(filename)
print(strjpgs)
savefile.close()
print "done"

use a try / except and break
while os.path.isfile(filename):
try: # try to do this
# <your code>
except FileNotFoundError: # if this error occurs
break # exit the loop
The reason your code doesn't currently work is you're checking the previous file exists in your while loop. Not the next one. Hence you could also do
while True:
strjpgs = "Extracted Layers: \n \n"
filename = "source" + str(n) + ".html"
if not os.path.isfile(filename):
break
# <rest of your code>

you can try opening file, and break out of while loop once you catch an IOError exception.
from bs4 import BeautifulSoup
import re
import os.path
n = 1
filename = "source" + str(n) + ".html"
savefile = open('OUTPUT.csv', 'w')
while os.path.isfile(filename):
try:
strjpgs = "Extracted Layers: \n \n"
filename = "source" + str(n) + ".html"
n = n + 1
file = open(filename, "r")
except IOError:
print("file not found! breaking out of loop.")
break
soup = BeautifulSoup(file, "html.parser")
thedata = soup.find("div", class_="cplayer")
strdata = str(thedata)
DoRegEx = re.compile('/([^/]+)\.jpg')
jpgs = DoRegEx.findall(strdata)
strjpgs = strjpgs + "\n".join(jpgs) + "\n \n"
savefile.write(filename + '\n')
savefile.write(strjpgs)
print(filename)
print(strjpgs)
savefile.close()
print "done"

I'll suggest you to use os.path.exists() (which returns True/False) and os.path.isfile() both.
Use with statement to open file. It is Pythonic way to open files.
with statement is best preferred among the professional coders.
These are the contents of my current working directory.
H:\RishikeshAgrawani\Projects\Stk\ReadHtmlFiles>dir
Volume in drive H is New Volume
Volume Serial Number is C867-828E
Directory of H:\RishikeshAgrawani\Projects\Stk\ReadHtmlFiles
11/05/2018 16:12 <DIR> .
11/05/2018 16:12 <DIR> ..
11/05/2018 15:54 106 source1.html
11/05/2018 15:54 106 source2.html
11/05/2018 15:54 106 source3.html
11/05/2018 16:12 0 stopReadingIfNot.md
11/05/2018 16:11 521 stopReadingIfNot.py
5 File(s) 839 bytes
2 Dir(s) 196,260,925,440 bytes free
The below Python code shows how will you read files source1.html, source2.html, source.3.html and stop if there is no more files of the form sourceX.html (where X is 1, 2, 3, 4, ... etc.).
Sample code:
import os
n = 1;
html_file_name = 'source%d.html'
# It is necessary to check if sourceX.html is file or directory.
# If it is directory the check it if it exists or not.
# It it exists then perform operation (read/write etc.) on file.
while os.path.isfile(html_file_name % (n)) and os.path.exists(html_file_name % (n)):
print "Reading ", html_file_name % (n)
# The best way (Pythonic way) to open file
# You don't need to bother about closing the file
# It will be taken care by with statement
with open(html_file_name % (n), "r") as file:
# Make sure it works
print html_file_name % (n), " exists\n";
n += 1;
Output:
H:\RishikeshAgrawani\Projects\Stk\ReadHtmlFiles>python stopReadingIfNot.py
Reading source1.html
source1.html exists
Reading source2.html
source2.html exists
Reading source3.html
source3.html exists
So based on the above logic. you can modify your code. It will work.
Thanks.

This appears to be a sequence error. Let's look at a small fragment of your code, specifically lines dealing with filename:
filename = "source" + str(n) + ".html"
while os.path.isfile(filename):
filename = "source" + str(n) + ".html"
n = n + 1
file = open(filename, "r")
You're generating the next filename before you open the file (or really, checking the old filename then opening a new one). It's a little hard to see because you're really updating n while filename holds the previous number, but if we look at them in sequence it pops out:
n = 1
filename = "source1.html" # before loop
while os.path.isfile(filename):
filename = "source1.html" # first time inside loop
n = 2
open(filename)
while os.path.isfile(filename): # second time in loop - still source1
filename = "source2.html"
n = 3
open(filename) # We haven't checked if this file exists!
We can fix this a few ways. One is to move the entire updating, n before filename, to the end of the loop. Another is to let the loop mechanism update n, which is a sight easier (the real fix here is that we only use one filename value in each iteration of the loop):
for n in itertools.count(1):
filename = "source{}.html".format(n)
if not os.path.isfile(filename):
break
file = open(filename, "r")
#...
At the risk of looking rather obscure, we can also express the steps functionally (I'm using six here to avoid a difference between Python 2 and 3; Python 2's map wouldn't finish):
from six.moves import map
from itertools import count, takewhile
numbers = count(1)
filenames = map('source{}.html'.format, numbers)
existingfiles = takewhile(os.path.isfile, filenames)
for filename in existingfiles:
file = open(filename, "r")
#...
Other options include iterating over the numbers alone and using break when isfile returns False, or simply catching the exception when open fails (eliminating the need for isfile entirely).

Class for file creation and directory validation

After reading some texts regarding creation of files under python, i decided to create this class which creates a new file on a directory, and creating a backup on the other directory if the file already exists (and if it's older than x hours )
The main reason i opened this question is to know if this is a correct way to write a class using try/except correctly, because actually i'm getting a little confused about the preference of using try/except instead if/elses.
Bellow, the working example:
import os
import datetime
class CreateXML():
def __init__(self, path, filename):
self.path = path
self.bkp_path = "%s\\backup" % path
self.filename = filename
self.bkp_file = "%s.previous" % filename
self.create_check = datetime.datetime.now()-datetime.timedelta(hours=-8)
#staticmethod
def create_dir(path):
try:
os.makedirs(path)
return True
except:
return False
#staticmethod
def file_check(file):
try:
open(file)
return True
except:
return False
def create_file(self, target_dir, target_file):
try:
target = "%s\\%s" % (target_dir, target_file)
open(target, 'w')
except:
return False
def start_creation(self):
try:
# Check if file exists
if self.file_check("%s\\%s" % (self.path, self.filename)):
self.create_dir(self.bkp_path)
creation = os.path.getmtime("%s\\%s" % (self.path, self.filename))
fcdata = datetime.datetime.fromtimestamp(creation)
# File exists and its older than 8 hours
if fcdata < self.create_check:
bkp_file_path = "%s\\%s " % (self.bkp_path, self.bkp_file)
new_file_path = "%s\\%s " % (self.path, self.filename)
# If backup file exists, erase current backup file
# Move existing file to backup and create new file.
if self.file_check("%s\\%s" % (self.bkp_path, self.bkp_file)):
os.remove(bkp_file_path)
os.rename(new_file_path, bkp_file_path)
self.create_file(self.bkp_path, self.bkp_file)
#No backup file, create new one.
else:
self.create_file(self.bkp_path, self.bkp_file)
else:
# Fresh creation
self.create_dir(self.path)
self.create_file(self.path, self.filename)
except OSError, e:
print e
if __name__ == '__main__':
path = 'c:\\tempdata'
filename = 'somefile.txt'
cx = CreateXML(path, filename)
cx.start_creation()
So, basically the real question here is:
-With the example above, the usage of try/except is correct?
-It's correct to perform the validations using try/except to check if file or directory allready exists? instead using a simplified version like this one:
import os
# Simple method of doing it
path = 'c:\\tempdata'
filename = 'somefile.txt'
bkp_path = 'c:\\tempdata\\backup'
bkp_file = 'somefile.txt.bkp'
new_file_path = "%s\\%s" % (path, filename)
bkp_file_path = "%s\\%s" % (bkp_path, bkp_file)
if not os.path.exists(path):
print "create path"
os.makedirs(bkp_path)
if not os.path.isfile(new_file_path):
print "create new file"
open(new_file_path, 'w')
else:
print"file exists, moving to backup folder"
#check if backup file exists
if not os.path.isfile(bkp_file_path):
print "New backup file created"
open(bkp_file_path, 'w')
else:
print "backup exists, removing backup, backup the current, and creating newfile"
os.remove(bkp_file_path)
os.rename(new_file_path, bkp_file_path)
open(bkp_file_path, 'w')
-If the usage of try/except is correct, its recomended write an a big class to create a file if it's possible to write a short version of it?
Please do not close this tread, since i'm really confused about what is the "most correct pythonic way to do it".
Thanks in advance.

Monitoring zip file extraction (display percentage) in python

I have a hundred zipfiles in a directory and so I did a python script to unzip all files, but I needed to display a percentage status of each file inside of anyone huge zipfile (actually each zipfile has only one file).
I found some examples here but in all of them each zipfile had several files inside it therefore the percentage was about the number of files inside of zipfile and not about one of them (my case).
So, I wrote the code below, but for each zipfile I just got to show "100% completed" but I should showing for each file, something like that:
10% Completed
12% Completed
16% Completed
...
100% Completed
I really appreciate any suggestion.
# -- coding: utf-8 --
import glob, zipfile, sys, threading
from os.path import getsize
class Extract(threading.Thread):
def __init__(self, z, fname, base, lock):
threading.Thread.__init__(self)
self.z = z
self.fname = fname
self.base = base
self.lock = lock
def run(self):
self.lock.acquire()
self.z.extract(self.fname, self.base)
self.lock.release()
if len(sys.argv) < 2:
sys.exit("""
Sintaxe : python %s [Nome da Pasta]
""" % sys.argv[0])
base = sys.argv[1]
if base[len(base)-1:] != '/':
base += '/'
for fs in glob.glob(base + '*.zip'):
if 'BR' not in fs.split('.'):
f = open(fs,'rb')
z = zipfile.ZipFile(f)
for fname in z.namelist():
size = [s.file_size for s in z.infolist() if s.filename == fname][0]
lock = threading.Lock()
background = Extract(z, fname, base, lock)
background.start()
print fname + ' => ' + str(size)
while True:
lock.acquire()
filesize = getsize(base + fname)
lock.release()
print "%s %% completed\r" % str(filesize * 100.0 / size)
if filesize == size:
break

The extract method writes directly to disk. That's okay, but you want to hook into that. Rather than using extract, you might want to use open. With open, you'll get a file-like object out of it, and you can copy from that file to the file on disk, writing out progress as you go.

Here's an example snippet of code you can use to modify yours. It uses the ZipInfo object to find the uncompressed size of the member file. As you read it out you can report how close to finished you are.
Note that this is written for Python 3.2 and above; the with statement support was added then. With previous versions you would need to open the zip file and close it manually.
from zipfile import ZipFile
chunk_size = 1024 * 1024
zip_path = "test_zip.zip"
with ZipFile(zip_path, 'r') as infile:
for member_info in infile.infolist():
filename = member_info.filename
file_size = member_info.file_size
with open("{}_{}".format(zip_path, filename), 'wb') as outfile:
member_fd = infile.open(filename)
total_bytes = 0
while 1:
x = member_fd.read(chunk_size)
if not x:
break
total_bytes +=outfile.write(x)
print("{0}% completed".format(100 * total_bytes / file_size))

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Create new files in sequence when size exceeds 1 MB - python

Related

increment csv file number after loop completion

Split log data by MB size in Python

Python - How to stop the loop

Class for file creation and directory validation

Monitoring zip file extraction (display percentage) in python

Categories

Resources