Read csv from previous 7 days in Python - python

I have written a function to read files with specific wildcard and path from (like this one for example) previous seven days.
def i_get_last_week_file(self, par_path, par_file_wildcard, par_datetime):
proc_datetime = par_datetime - timedelta(weeks=1)
logger.info('Processing time: %s', str(proc_datetime))
# file_list = []
while proc_datetime <= par_datetime:
sdate = proc_datetime.strftime('%Y_%m_%d-%H')
for p_file in os.listdir(par_path):
if fnmatch.fnmatch(p_file, par_file_wildcard + sdate + '*.csv'):
self.files_list.append(p_file)
break
return self.files_list
def get_csv_from_local_weekly(self, par_path, par_wildcard_name, par_date=None, par_time=None):
if par_date is None:
par_date = self.file_date
if par_time is None:
par_time = self.file_time
end_datetime = datetime.combine(par_date, par_time)
a = self.i_get_last_day_file(par_path, par_wildcard_name, end_datetime)
for i in a:
try:
df = pd.read_csv(par_path+''+i, index_col=None, header=0, delimiter=';')
self.pandas_list.append(df)
except Exception:
frame = pd.concat(self.pandas_list)
self.files_list = []
self.pandas_list = []
return frame
The problem is, read only the files that are exactly seven days old, but I need to collect all files from previous 7 days from today.

Well you need to create a range of 7 days and this is one way of doing it:
import datetime
import re
td = datetime.datetime.today()
lastWeek = [(td - datetime.timedelta(i)).strftime('%Y_%m_%d-%H') for i in range(7)]
for p_file in os.listdir(par_path):
dateSearch = re.search('\d{4}_\d{2}_\d{2}-\d{2}', pfile)
if dateSearch:
dateFound = dateSearch.group(0)
if dateFound in lastWeek:
...
Of course, you can change the td into your date variable, I just used it for testing.
Also, your proc_datetime doesn't seem to increment inside the while loop, and if you introduce the increment - you won't need the break:
while proc_datetime <= par_datetime:
sdate = proc_datetime.strftime('%Y_%m_%d-%H')
for p_file in os.listdir(par_path):
if fnmatch.fnmatch(p_file, par_file_wildcard + sdate + '*.csv'):
self.files_list.append(p_file)
proc_dateimte += datetime.timedelta(days=1)

Related

List inside dictionary

I am unable to get the last 3 digits of the id number.
from datetime import datetime
def days_to_birthday(date):
datetime_object = datetime.strptime(date, "%Y-%m-%d")
date = datetime_object.date()
num_days = date.timetuple().tm_yday
return num_days
fo = open("Data.txt", 'r') # File containg data
content = [i.rsplit() for i in fo.readlines()]
names = [content[i][0] for i in range(len(content))]
dates = [content[i][1] for i in range(len(content))]
gender = [content[i][2] for i in range(len(content))]
id_numbers = []
mydict = dict(zip(dates, gender))
for i in mydict:
x = days_to_birthday(i)
if mydict.get(i) == "F":x += 500
x = str(x)
if len(x) < 3:x = x.zfill(3)
i = i.split('-')
out = i[0] + x
id_numbers.append(out)
for i in range(len(names)):
print(f"{names[i]} {id_numbers[i]}" )
Running your code would raise SyntaxError: 'return' outside function.
Because Python is a whitespace sensitive language, return num_days must be further indented so it applies within the days_to_birthday function.

I need to compare and dates and return how many days are left to expiry or how many days ago they expired

I have never used Python before, coming from a Javascript background.
I need to loop through the array in the csv and compare it with the date in the today variable. Then return where each item is expired or valid. Also how many days are left to expiry or how many days ago they expired.
Any pointers?
from datetime import datetime
today = datetime(2020, 5, 2)
with open('./guarantees_end.csv', 'r') as f:
for line in f.readlines():
My csv data is compiled like below:
2020-06-23,Flat C
2020-07-31,2 Teal Close
2020-08-14,67 Pinkers Mead
2020-08-31,3 Acacia Road
2020-04-13,1 Manor Place
You can read today's date using:
import datetime
today = datetime.datetime.now().date()
My temp.csv contains:
2020-12-11, ABCD
2020-12-23, EFGH
2020-12-01, IJKL
2020-11-11, MNOP
To read from CSV you can use:
f = open('temp.csv')
csv_reader = csv.reader(f, delimiter=',')
for row in csv_reader:
product_date_str = str(row[0])
Now you can convert the date string (read from csv) to datetime.date object using:
date_format = '%Y-%m-%d'
product_date = datetime.datetime.strptime(product_date_str, date_format).date()
Now you can get the date difference using:
diff = (today - product_date).days
And then you can use this diff to print or display whatever you want. For example:
if diff > 0:
print('{} has already expired since {} days.'.format(row[1], abs(diff)))
elif diff < 0:
print('{} will expire in next {} days.'.format(row[1], abs(diff)))
else:
print('{} will expire today.'.format(row[1]))
So the complete code snippet would look like:
import csv
import datetime
today = datetime.datetime.now().date()
with open('temp.csv') as f:
csv_reader = csv.reader(f, delimiter=',')
for row in csv_reader:
product_date_str = str(row[0])
date_format = '%Y-%m-%d'
product_date = datetime.datetime.strptime(product_date_str, date_format).date()
diff = (today - product_date).days
if diff > 0:
print('{} has already expired since {} days.'.format(row[1], abs(diff)))
elif diff < 0:
print('{} will expire in next {} days.'.format(row[1], abs(diff)))
else:
print('{} will expire today.'.format(row[1]))

Filename convention for TimedRotatingFileHandler log? (python)

Python 2.7:
Every time a log experiences a rollover event (logs with a RotatingFileHandler) a 'backup' log is generated.
For instance :
logFile = 'general.log'
file_handler = logging.handlers.TimedRotatingFileHandler(logFile,when="midnight")
Results in midnight roll over and on rollover event the following file is created:
general.log.2015-01-21
Does this module offer any flexibility how these filenames are structured?
ie use a different convention ... 20150121_general.log
Short answer is no: according to TimedRotatingFileHandler documentation you have no way to do it.
The suffix change based on when parameter like you can see in the code https://hg.python.org/cpython/file/2.7/Lib/logging/handlers.py#l187
From the same source code you can see that override suffix is simple but you must override extMatch too:
class MyTimedRotatingFileHandler(TimedRotatingFileHandler):
def __init__(self, *args, **kwargs):
super(MyTimedRotatingFileHandler,self).__init__(*args,**kwargs)
self.suffix = "%Y%m%d"
self.extMatch = re.compile(r"^\d{4}\d{2}\d{2}$")
Unfortunately replace dot separator and swap suffix and basename is not so simple and you must rewrite doRollover() and getFilesToDelete() methods.
A hacking can be something like this (untested)... I hope it works but I cannot give any warrant :)
class MyTimedRotatingFileHandler(TimedRotatingFileHandler):
self.extMatch = r"^\d{4}-\d{2}-\d{2}$"
def getFilesToDelete(self):
""" CUT, PASTE AND .... HACK
"""
dirName, baseName = os.path.split(self.baseFilename)
fileNames = os.listdir(dirName)
result = []
extMatch = re.compile(r"^\d{4}\d{2}\d{2}$")
ends = "_" + baseName + ".log"
elen = len(ends)
for fileName in fileNames:
if fileName[-elen:] == ends:
date = fileName[-elen:]
if self.extMatch.match(date):
result.append(os.path.join(dirName, fileName))
result.sort()
if len(result) < self.backupCount:
result = []
else:
result = result[:len(result) - self.backupCount]
return result
def doRollover(self):
"""
CUT AND PAST FROM TimedRotatingFileHandler
customize file name by prefix instead suffix
"""
if self.stream:
self.stream.close()
self.stream = None
# get the time that this sequence started at and make it a TimeTuple
currentTime = int(time.time())
dstNow = time.localtime(currentTime)[-1]
t = self.rolloverAt - self.interval
if self.utc:
timeTuple = time.gmtime(t)
else:
timeTuple = time.localtime(t)
dstThen = timeTuple[-1]
if dstNow != dstThen:
if dstNow:
addend = 3600
else:
addend = -3600
timeTuple = time.localtime(t + addend)
#################################################
# THE HACK!!!! ##################################
##################################################
dfn = time.strftime("%Y%m%d", timeTuple) + "_" +self.baseFilename + ".log"
if os.path.exists(dfn):
os.remove(dfn)
# Issue 18940: A file may not have been created if delay is True.
if os.path.exists(self.baseFilename):
os.rename(self.baseFilename, dfn)
if self.backupCount > 0:
for s in self.getFilesToDelete():
os.remove(s)
if not self.delay:
self.stream = self._open()
newRolloverAt = self.computeRollover(currentTime)
while newRolloverAt <= currentTime:
newRolloverAt = newRolloverAt + self.interval
#If DST changes and midnight or weekly rollover, adjust for this.
if (self.when == 'MIDNIGHT' or self.when.startswith('W')) and not self.utc:
dstAtRollover = time.localtime(newRolloverAt)[-1]
if dstNow != dstAtRollover:
if not dstNow: # DST kicks in before next rollover, so we need to deduct an hour
addend = -3600
else: # DST bows out before next rollover, so we need to add an hour
addend = 3600
newRolloverAt += addend
self.rolloverAt = newRolloverAt

Get files created before x days, y hours and z minutes in python?

Hi I have the below program which should give me the list of files created before a given amount of time from the current time
def filesOlderThan(path,day,hour,mins):
file_list = []
for root, dirs, files in os.walk(path):
for a in files:
path1 = root + "\\"+ a
sub_time = datetime.datetime.today() - datetime.timedelta(days=day,hours=hour,minutes=mins)
r_time = time.mktime(sub_time.utctimetuple())
c_time = os.path.getctime(path1)
if(r_time < c_time):
file_list.append(path1)
count = len(file_list)
return file_list,count
files,count = filesOlderThan("C:\\Arena\\sample",0,0,10)
print count
for a in files:
print a
When I run the program
C:\Arena>python getFilesOlderThan.py
0
The program works for values hour values, but does not work with the following input case
I set the minutes to 10
Create a file inside C:\Arena\Sample
Run the program, the program does not give me the file.
r_time = time.mktime(sub_time.utctimetuple())
You're comparing the file's timestamp in localtime withthe current time in UTC.
You should use timetuple() instead of utctimetuple.
You can make some minor improvements to your function, such as doing calculations using datetimes instead of timestamps, using datetime.now instead of today and using cross-platform os.join instead of hard-coded windows separators. It also seems unnecessary to calculate the len of the list inside the function - you can do it outside, if it's needed.
def filesOlderThan(path,day,hour,mins):
delta= datetime.timedelta(days=day,hours=hour,minutes=mins)
now= datetime.datetime.now()
file_list = []
for root, dirs, files in os.walk(path):
for a in files:
path1 = os.path.join(root, a)
c_time = datetime.datetime.fromtimestamp(os.path.getctime(path1))
if(now - delta < c_time):
file_list.append(path1)
return file_list
Finally, if you ever plan on sending these timestamps to another system, remember to convert them to utc (both the file's and the current). You'll probably want to use pytz for that.
i create this function but you can modified it
import datetime
import time
def diff(file):
today = datetime.datetime.today()
file_mtime = time.ctime(os.path.getmtime(file))
t_ob = time.strptime(file_mtime)
t = time.strftime("%Y-%m-%d %H-%M-%S", t_ob)
file_d = datetime.datetime.strptime(t, "%Y-%m-%d %H-%M-%S")
minutes = divmod((today-file_d).total_seconds(), 60)
if (today - file_d).days > 30:
return f"{round(((today - file_d).days)/30)} months"
elif (today - file_d).days != 0:
return f"{(today - file_d).days} days"
elif minutes[0] > 59:
return f"{round(minutes[0]/60)} huors"
elif minutes[0] != 0:
return f"{round(minutes[0])} minutes"
else:
return f"{round(minutes[1])} seconds"
>>>diff("file.txt")

Cycling through a list of months with years to check if file exists

I am trying to cycle through a list of months with years starting from the year of 2012 and month of November, all the way up to the current year and month.
start_year = "2012"
start_month = "November"
month = start_month
year = start_year
# some sort of for loop or cycle here
try:
with open('{} {}.csv'.format(month, year)):
CSVFile = True
if CSVFile:
print "do something cool here"
except IOError:
CSVFile = False
print ""
Any advice or constructive comments on how to achive this or advice would be greatly appreciated.
Thanks SMNALLY
I would probably do this as follows:
import datetime, os
current_month = datetime.date.today().replace(day=1)
# this could be more concise if `start_month` were the month number rather than month name
possible_month = datetime.datetime.strptime('%s %s' % (start_month, start_year), '%B %Y').date()
while possible_month <= current_month:
csv_filename = possible_month.strftime('%B %Y') + '.csv'
if os.path.exists(csv_filename):
CSVFile = True
# do something cool here, and maybe break the loop if you like
possible_month = (possible_month + datetime.timedelta(days=31)).replace(day=1)
Let me know if you need me to expand on how that works.
import datetime as dt
startYear = 2012
startMonth = 11
delta = 1
curr = dt.datetime(year=startYear, month=startMonth, day=1)
now = dt.datetime.now()
endYear = now.year
endMonth = now.month
while dt.datetime(year=year+(delta/12), month=month+(delta%12),day=1) <= now:
try:
with open('{} {}.csv'.format(month, year)):
CSVFile = True
if CSVFile:
print "do something cool here"
except IOError:
CSVFile = False
print ""
delta += 1

Categories