Datetime format detection Python3 - python

I want to use python3 to do the date format detection for example
I have file1 = "test_20180101234523.txt"
and the output should be the format type %Y%M%D%H%m%S and expected datetime format 2018-01-01,23:45:23
Here's what I did so far,
import re
file1 = "test_20180101234523.txt"
pattern = r'[0-9]{14}'
regex=re.compile(pattern)
matches = regex.findall(file1)
matchesStr = matches[0]
matchesYear = int(matchesStr[0:4])
matchesMonth = int(matchesStr[4:6])
matchesdate = int(matchesStr[6:8])
matchesH = int(matchesStr[8:10])
matchesM = int(matchesStr[10:12])
matchesS = int(matchesStr[12:14])
def checkdate():
if matchesYear > 1900:
print("%Y")
else:
print("Year is not format")
if matchesMonth >= 1 and matchesMonth <= 12:
print("%M")
else:
print("Month is not format")
if matchesdate >= 1 and matchesdate <= 31:
print("%d")
else:
print("Date is not format")
if matchesH >= 1 and matchesH <= 24:
print("%H")
else:
print("Hour is not a format")
if matchesM >= 1 and matchesM <= 60:
print("%m")
else:
print("Min is not a format")
if matchesS >= 1 and matchesS <= 60:
print("%S")
else:
print("Sec is not a format")
I use regex to find out the group of integer and substring those to be each variable that I need. And use if-else condition to check each of those.
If you guys have any other idea, could you share, please?

Use datetime.strptime as (Assuming the regex output will be 14 digit everytime and follows same format):
import datetime
date = datetime.datetime.strptime('20180101234523', '%Y%m%d%H%M%S')
date.strftime('%Y-%m-%d,%H:%M:%S')
'2018-01-01,23:45:23'

If the digits in your inputs are always 14 digits, then you can usedatetime.strptime with regex along with this code to have your desired output:
import re
from datetime import datetime
def get_integers(file_name, prefix='test_'):
"""Return matched integers"""
regex = re.compile(r'{prefix}(\d+)'.format(prefix=prefix))
matched = re.findall(regex, file_name)
return matched[0] if matched else ''
def get_datetime_object(date_string):
"""Return datetime object from date_string if it exists"""
try:
date_object = datetime.strptime(date_string, '%Y%m%d%H%M%S')
return date_object.strftime('%Y-%m-%d,%H:%M:%S')
except ValueError:
return None
file1 = 'test_20180101234523.txt'
integers = get_integers(file1)
date = get_datetime_object(integers)
print(date)
Output:
2018-01-01,23:45:23
PS: Notice, if the integers in the input are'nt 14 digits, then you should adapt get_integers function to return string that contains 14 digits.

Related

How to replace the day in a date with another date?

I'm trying to replace the day in my if statement for my date but I keep getting this output for my year.
05/15/5 besides 05/15/2020 . Code is below:
today_date = datetime.datetime.now()
date = today_date.date()
formatted_date = datetime.date.strftime(date, "%m/%d/%Y")
mmonth = date.month
myear = date.year
mdate = date.day
if mdate < 7:
m0weekend = formatted_date.replace(str(myear),str(mmonth),1)
else:
m0weekend = formatted_date.replace(str(myear),str(mmonth),15)
it's easier to replace the day before converting to a string:
date = date.replace(day=1)
or, in your case:
if mdate < 7:
m0weekend = date.replace(day=1)
else:
m0weekend = date.replace(day=15)
formatted_date is actually a string.
You are using the str.replace() method not the datetime.date.replace() method.
import datetime
today_date = datetime.datetime.now()
pre_formatted_date = today_date.date()
mmonth = pre_formatted_date.month
myear = pre_formatted_date.year
mdate = pre_formatted_date.day
if mdate < 7:
pre_formatted_date = pre_formatted_date.replace(day=1)
else:
pre_formatted_date = pre_formatted_date.replace(day=15)
print(pre_formatted_date)
formatted_date = pre_formatted_date.strftime("%m/%d/%Y")
print(formatted_date)
Which has the following output:
2020-05-15
05/15/2020
You might get today datetime.date directly from datetime rather than creating datetime.datetime and converting to date. After you have today you might create needed datetime.date and turn it into str, i.e.:
import datetime
today = datetime.date.today()
date = datetime.date(today.year, today.month, 1 if today.day < 7 else 15)
formatted_date = datetime.date.strftime(date, "%m/%d/%Y")
print(formatted_date) # 05/15/2020

Getting dates in python between a past datestamp and the present

Language Python
I am wondering if anyone can help me print out some dates.
i cam trying to create a loop in which i pass in a date say 01/1/2017 and the loop will then output the first and last day in every month between then and the present day.
Example
01/01/2017
31/01/2017
01/02/2017
28/02/2017
etc
Any help will be appreciated
Hope this will help,
Code:
from datetime import date
from dateutil.relativedelta import relativedelta
from calendar import monthrange
d1 = date(2018, 2, 26)
d2 = date.today()
def print_month_day_range(date):
first_day = date.replace(day = 1)
last_day = date.replace(day = monthrange(date.year, date.month)[1])
print (first_day.strftime("%d/%m/%Y"))
print (last_day.strftime("%d/%m/%Y"))
print_month_day_range(d1)
while (d1 < d2):
d1 = d1 + relativedelta(months=1)
print_month_day_range(d1)
Output:
01/02/2018
28/02/2018
01/03/2018
31/03/2018
...
01/07/2018
31/07/2018
you can use calendar module. Below is the code:
import datetime
from calendar import monthrange
strt_date = '01/1/2017'
iter_year = datetime.datetime.strptime(strt_date, '%m/%d/%Y').year
iter_month = datetime.datetime.strptime(strt_date, '%m/%d/%Y').month
cur_year = datetime.datetime.today().year
cur_month = datetime.datetime.today().month
while cur_year > iter_year or (cur_year == iter_year and iter_month <= cur_month):
number_of_days_in_month = monthrange(iter_year, iter_month)[1]
print '/'.join([str(iter_month) , '01', str(iter_year)])
print '/'.join([str(iter_month), str(number_of_days_in_month), str(iter_year)])
if iter_month == 12:
iter_year += 1
iter_month = 1
else:
iter_month += 1
this could work, as long as the first date you give is always the first of the month
from datetime import datetime
from datetime import timedelta
date_string = '01/01/2017'
date = datetime.strptime(date_string, '%d/%m/%Y').date()
today = datetime.now().date()
months = range(1,13)
years = range(date.year, today.year + 1)
for y in years:
for m in months:
new_date = date.replace(month=m, year=y)
last_day = new_date - timedelta(days=1)
if (date < new_date) & (new_date <= today):
print last_day.strftime('%d/%m/%Y')
print new_date.strftime('%d/%m/%Y')
elif (date <= new_date) & (new_date <= today):
print new_date.strftime('%d/%m/%Y')
This code would print the first and last days of all months in a year.
Maybe add some logic to iterate through the years
import datetime
def first_day_of_month(any_day):
first_month = any_day.replace(day=1)
return first_month
def last_day_of_month(any_day):
next_month = any_day.replace(day=28) + datetime.timedelta(days=4)
return next_month - datetime.timedelta(days=next_month.day)
for month in range(1, 13):
print first_day_of_month(datetime.date(2017, month, 1))
print last_day_of_month(datetime.date(2017, month, 1))

Match and change multiple date formats into one with Python Regex

I need to implement a function in Python which is able to retrieve multiple date formats from input string, change them into one specific format and return the date only:
Format Example Input String
MMDDYYYY foo.bar.02242015.txt
MMDDYY foo.bar.022415.txt
MONCCYY foo.bar.FEB2015.txt
YYYY-MM-DD foo_bar_2015-02-01_2015-02-28.txt
YYYYMMDD foo_bar_20150224.txt
MM_YY foo_bar_02_15.txt
YYYYMMDD foo_bar_20150224.txt
Output: just a fixed 8 digits date format (no foo, bar or txt):
YYYYMMDD (e.g. 20120524)
Example:
Input Output
foo.bar.02242015.txt -> 20150224
Some requirements:
if date is missing, add the last day of the month:
foo_02_15.txt -> 20150228
if year is 2 digits, change it to 4:
foo_02_24_16.txt -> 20160224
valid year is current or previous year, for now: 2016 or 2015
if month is not number, e.g. FEB, change it to 2 digit number:
foo.FEB2015.txt -> 20150228
Format 'YYYY-MM-DD' always contains two dates, fetch the second one:
foo_2015-02-01_2015-02-28.txt -> 20150228
Anyone know how to do it with Regex in Python? Or what is the best practice to do it?
Try this:
import re
import time
import datetime
import calendar
p = re.compile(ur'(?<=\.|_)([A-Z\d+_-]*?([A-Z\d+_-]{0,10}))(?=\.)')
test_str = u"Format Example Input String \n\nMMDDYYYY foo.bar.02242015.txt\nMMDDYY foo.bar.022415.txt\nMONCCYY foo.bar.FEB2015.txt\nYYYY-MM-DD foo_bar_2015-02-01_2015-02-28.txt\nYYYYMMDD foo_bar_20150224.txt\nMM_YY foo_bar_02_15.txt\nYYYYMMDD foo_bar_20150224.txt"
def changedate(date):
try:
t = time.strptime(date,'%m%d%Y')
except:
pass
try:
t = time.strptime(date,'%m%d%y')
except:
pass
try:
t = time.strptime(date,'%b%Y')
lastday = calendar.monthrange(int(t.tm_year), int(t.tm_mon))[1]
t = time.strptime(date + str(lastday),'%b%Y%d')
except:
pass
try:
t = time.strptime(date,'%m_%y')
lastday = calendar.monthrange(int(t.tm_year), int(t.tm_mon))[1]
t = time.strptime(date + str(lastday),'%m_%y%d')
except:
pass
try:
t = time.strptime(date,'%Y-%m-%d')
except:
pass
try:
r = time.strftime("%Y%m%d",t)
return r
except:
pass
return date
test_str = re.sub(p,lambda m: changedate(m.group(2)), test_str)
print test_str
Regex Demo
Input
Format Example Input String
MMDDYYYY foo.bar.02242015.txt
MMDDYY foo.bar.022415.txt
MONCCYY foo.bar.FEB2015.txt
YYYY-MM-DD foo_bar_2015-02-01_2015-02-28.txt
YYYYMMDD foo_bar_20150224.txt
MM_YY foo_bar_02_15.txt
YYYYMMDD foo_bar_20150224.txt
Output:
Format Example Input String
MMDDYYYY foo.bar.20150224.txt
MMDDYY foo.bar.20150224.txt
MONCCYY foo.bar.20150228.txt
YYYY-MM-DD foo_bar_20150228.txt
YYYYMMDD foo_bar_20150224.txt
MM_YY foo_bar_20150228.txt
YYYYMMDD foo_bar_20150224.txt
Explanation:
E.g.
Input
foo_bar_2015-02-01_2015-02-28.txt
So
(?<=\.|_)([A-Z\d+_-]*?([A-Z\d+_-]{0,10}))(?=\.)
Regex to capture date string in to group m
1. [182-203] `2015-02-01_2015-02-28`
2. [193-203] `2015-02-28`
m.group(0) = 2015-02-01_2015-02-28
m.group(1) = 2015-02-01_2015-02-28
m.group(2) = 2015-02-28
Then
lambda m: changedate(m.group(2)) to re-format datetime
So
2015-02-28 can not pass others as
try:
t = time.strptime(date,'%m%d%Y')
except:
pass
But pass only this block
try:
r = time.strftime("%Y-%m-%d",t)
return r
except:
pass
Then format it
try:
r = time.strftime("%Y%m%d",t)
return r
except:
pass
UPDATE2 Please try the following approach (python 2.7):
import re
import calendar
INPUT = ['foo.bar.02242015.txt',
'foo.bar.022415.txt',
'foo.bar.FEB2015.txt',
'foo_bar_2015-02-01_2015-02-28.txt',
'foo_bar_20150224.txt',
'foo_bar_02_15.txt',
'foo_bar_20150224.txt' ]
P1 = r'(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])((?:19|20)?\d{2})'
P2 = r'[A-Z]{3}[12]\d{3}|[12]\d{3}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])_?|(?:0[1-9]|1[0-2])_[12]\d'
MONTHS = ['JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC']
def StrFormat(date_string):
m2 = re.findall(P2, date_string)
if m2:
for m in m2:
if len(m) == 5:
month, year = m.split('_')[0], '20' + m.split('_')[1]
last_day = calendar.monthrange(int(year), int(month))[1]
date_string = re.sub(P2, year+month+ str(last_day), date_string, 1)
elif len(m) == 7:
month, year = str(MONTHS.index(m[0:3]) + 1).zfill(2), m[3:]
last_day = calendar.monthrange(int(year), int(month))[1]
date_string = re.sub(P2, year+month+ str(last_day), date_string, 1)
elif len(m) == 10:
date_string = re.sub(P2, m.replace('-', ''), date_string, 1)
elif len(m) > 5:
date_string = re.sub(P2, '', date_string, 1)
m1 = re.findall(P1, date_string)
if m1:
for m in m1:
if len(m[2]) == 2:
date_string = re.sub(P1, r'20\3\1\2', date_string, 1)
elif len(m[2]) == 4:
date_string = re.sub(P1, r'\3\1\2', date_string, 1)
elif len(m) > 2:
date_string = re.sub(P1, '', date_string, 1)
return date_string
for i in INPUT:
print i.ljust(35), '->', StrFormat(i).rjust(20)
Output:
foo.bar.02242015.txt -> foo.bar.20150224.txt
foo.bar.022415.txt -> foo.bar.20150224.txt
foo.bar.FEB2015.txt -> foo.bar.20150228.txt
foo_bar_2015-02-01_2015-02-28.txt -> foo_bar_20150228.txt
foo_bar_20150224.txt -> foo_bar_20150224.txt
foo_bar_02_15.txt -> foo_bar_20150228.txt
foo_bar_20150224.txt -> foo_bar_20150224.txt
Btw: It is indeed 10% Regex + 90% programming as suggested by noob :-)

Convert CreationTime of PDF to a readable format in Python

I'm working on PDF with Python and I'm accessing the file's meta data by using PDFMiner. I extract the info using this:
from pdfminer.pdfparser import PDFParser, PDFDocument
fp = open('diveintopython.pdf', 'rb')
parser = PDFParser(fp)
doc = PDFDocument()
parser.set_document(doc)
doc.set_parser(parser)
doc.initialize()
print doc.info[0]['CreationDate']
# And return this value "D:20130501200439+01'00'"
How can I convert D:20130501200439+01'00' into a readable format in Python?
I found the format documented here. I needed to cope with the timezones too because I have 160k documents from all over to deal with. Here is my full solution:
import datetime
import re
from dateutil.tz import tzutc, tzoffset
pdf_date_pattern = re.compile(''.join([
r"(D:)?",
r"(?P<year>\d\d\d\d)",
r"(?P<month>\d\d)",
r"(?P<day>\d\d)",
r"(?P<hour>\d\d)",
r"(?P<minute>\d\d)",
r"(?P<second>\d\d)",
r"(?P<tz_offset>[+-zZ])?",
r"(?P<tz_hour>\d\d)?",
r"'?(?P<tz_minute>\d\d)?'?"]))
def transform_date(date_str):
"""
Convert a pdf date such as "D:20120321183444+07'00'" into a usable datetime
http://www.verypdf.com/pdfinfoeditor/pdf-date-format.htm
(D:YYYYMMDDHHmmSSOHH'mm')
:param date_str: pdf date string
:return: datetime object
"""
global pdf_date_pattern
match = re.match(pdf_date_pattern, date_str)
if match:
date_info = match.groupdict()
for k, v in date_info.iteritems(): # transform values
if v is None:
pass
elif k == 'tz_offset':
date_info[k] = v.lower() # so we can treat Z as z
else:
date_info[k] = int(v)
if date_info['tz_offset'] in ('z', None): # UTC
date_info['tzinfo'] = tzutc()
else:
multiplier = 1 if date_info['tz_offset'] == '+' else -1
date_info['tzinfo'] = tzoffset(None, multiplier*(3600 * date_info['tz_hour'] + 60 * date_info['tz_minute']))
for k in ('tz_offset', 'tz_hour', 'tz_minute'): # no longer needed
del date_info[k]
return datetime.datetime(**date_info)
Is "+01'00'" the timezone information? Not taking that into account, you can create a datetime object as follows...
>>>from time import mktime, strptime
>>>from datetime import datetime
...
>>>datestring = doc.info[0]['CreationDate'][2:-7]
>>>ts = strptime(datestring, "%Y%m%d%H%M%S")
>>>dt = datetime.fromtimestamp(mktime(ts))
datetime(2013, 5, 1, 20, 4, 30)
use Python 3's datetime.strptime; just remove the apostrophes first:
from datetime import datetime
creation_date = "D:20130501200439+01'00'"
dt = datetime.strptime(creation_date.replace("'", ""), "D:%Y%m%d%H%M%S%z")
print(repr(dt))
# datetime.datetime(2013, 5, 1, 20, 4, 39, tzinfo=datetime.timezone(datetime.timedelta(seconds=3600)))
print(dt.isoformat())
# 2013-05-01T20:04:39+01:00
once you have a datetime object, you can format back to string however you like for a "readable" output, see strptime/strftime directives.
Guess I don't have the rep to comment on Paul Whipp's illustrative answer, but I've amended it to handle a form of the Y2K bug present in some of my old files. The year 2000 was written 19100, so the relevant line of pdf_date_pattern became
r"(?P<year>191\d\d|\d\d\d\d)",
and I added an elif to the transform values loop:
elif k == 'year' and len(v) == 5:
date_info[k] = int('20' + v[3:])

change (eg) 8 to 08...python

I am reading data from a csv file, and there are date elements in it, but there is an inconsistency in the dates.
For example: sometimes the date element is like 1/1/2011 and sometimes it is like 01/01/2011
Since I am plotting this data later.. this causes a great deal of noise in my plots. The following is my date class. Can you help me out in where and how to modify the code in order to get the date in the form 01/01/2011
import re
class Date:
def __init__(self, input_date):
self._input_date = input_date
self._date = None
self._month = None
self._year = None
self._hour = None
self._min = None
def setDate(self):
date = self._input_date
#date = re.findall('w+',date)
date = self.__mySplit()
#print"len ",len(date)
assert (len(date) >= 3) #has atleast dd/mm/yy
#dateLength = len(date[0])
self._month = int(date[0])
self._date = int(date[1])
self._year = int(date[2])
if (len(date) ==5):
self._hour = int(date[3])
self._min = int(date[4])
def __mySplit(self): #splitting the date by delimiters..
res = [self._input_date]
#print res
seps = [' ','/',':']
for sep in seps:
s,res = res,[]
for seq in s:
res += seq.split(sep)
#print res
return res
Thanks
You definitely want to be using datetime. Here's some code that will get a datetime from either string type:
from datetime import datetime
def strToDatetime(dateStr):
return datetime.strptime(dateStr, "%d/%m/%Y")
Then, you can print a datetime out in the format you want with:
strToDatetime("1/3/2011").strftime("%d/%m/%Y)
>'01/03/2011'
You should never need to roll your own date/time/datetime structure in python.

Categories