Finding the previous month - python

I've seen some methods using dateutil module to do this, but is there a way to do this without just using the built in libs?
For example, the current month right now is July. I can do this using the datetime.now() function.
What would be the easiest way for python to return the previous month?

It's very easy:
>>> previous_month = datetime.now().month - 1
>>> if previous_month == 0:
... previous_month = 12

You can use the calendar module
>>> from calendar import month_name, month_abbr
>>> d = datetime.now()
>>> month_name[d.month - 1] or month_name[-1]
'June'
>>> month_abbr[d.month - 1] or month_abbr[-1]
'Jun'
>>>

If you just want it as a string then do below process.
import datetime
months =(" Blank", "December", "January", "February", "March", "April",
"May","June", "July","August","September","October","November")
d = datetime.date.today()
print(months[d.month])

Generalized function finding the year and month, based on a month delta:
# %% function
def get_year_month(ref_date, month_delta):
year_delta, month_index = divmod(ref_date.month - 1 + month_delta, 12)
year = ref_date.year + year_delta
month = month_index + 1
return year, month
# %% test
some_date = date(2022, 5, 31)
for delta in range(-12, 12):
year, month = get_year_month(some_date, delta)
print(f"{delta=}, {year=}, {month=}")
delta=-12, year=2021, month=5
delta=-11, year=2021, month=6
delta=-10, year=2021, month=7
delta=-9, year=2021, month=8
delta=-8, year=2021, month=9
delta=-7, year=2021, month=10
delta=-6, year=2021, month=11
delta=-5, year=2021, month=12
delta=-4, year=2022, month=1
delta=-3, year=2022, month=2
delta=-2, year=2022, month=3
delta=-1, year=2022, month=4
delta=0, year=2022, month=5
delta=1, year=2022, month=6
delta=2, year=2022, month=7
delta=3, year=2022, month=8
delta=4, year=2022, month=9
delta=5, year=2022, month=10
delta=6, year=2022, month=11
delta=7, year=2022, month=12
delta=8, year=2023, month=1
delta=9, year=2023, month=2
delta=10, year=2023, month=3
delta=11, year=2023, month=4

If you want a date object:
import datetime
d = datetime.date.today() - datetime.timedelta(days=30)
>>> datetime.date(2015, 6, 29)

Related

Whats wrong with this code for checking age?

I want to know if inputed date of birth is over 18 or under.
def is_under_18(birth):
now = date.today()
return (
now.year - birth.year < 18
or now.year - birth.year == 18 and (
now.month < birth.month
or now.month == birth.month and now.day <= birth.day
)
)
And then:
year = int(input("Year born: "))
month = int(input("Month born: "))
day = int(input("Day born: "))`
birth = date(year,month,day)
if is_under_18(birth):
print('Under 18')
else:
print('Adult')
However, the only thing is, say I add a user which his birthday is the 25th of November 2004. The program lets me add it because it does not count the month. If I add a user which was born the 1st of January 2005, it doesn't allow me because 2022-2005=17.
Your original code doesn't seem to have a problem with the dates you mention, but does have a bug as Nov 22, 2004 is "Under 18" and today's date is Nov 22, 2022 (18th birthday). Use now.day < birth.day instead.
But if you compute the birthday required to be 18 by replacing today's year with 18 less, then directly compare the dates, you don't have to have a complicated comparison:
from datetime import date
def is_under_18(birth):
# today = date.today()
today = date(2022,11,22) # for repeatability of results
born_on_or_before = today.replace(year=today.year - 18)
return birth > born_on_or_before
print(f'Today is {date.today()}')
for year,month,day in [(2004,11,21), (2004,11,22), (2004,11,23), (2004,11,25), (2005,1,1)]:
birth = date(year,month,day)
if is_under_18(birth):
print(f'{birth} Under 18')
else:
print(f'{birth} Adult')
Output:
Today is 2022-11-22
2004-11-21 Adult
2004-11-22 Adult
2004-11-23 Under 18
2004-11-25 Under 18
2005-01-01 Under 18

(python) list of datetime to string

working on list of datetime to string
most of the examples useddatetime.strptime('Jun 1 2005', '%b %d %Y').date()
Convert string "Jun 1 2005 1:33PM" into datetime
which can only put one one input at a time, but I am reciving the entire string such as
customer_date_list = ['2011-06-2', '2011-08-05', '2011-02-04', '2010-01-14', '2010-12-13', '2010-01-12', '2010-2-11', '2010-02-07', '2010-12-02', '2011-11-30']
my expect output is
['2010-01-12', '2010-01-14', '2010-02-07', '2010-02-11', '2010-12-02', '2010-12-13', '2011-02-04', '2011-06-02', '2011-08-05', '2011-11-30']
the code below:
I'm making either of those code work:
list1_date_string = [datetime.strftime(fs, "%Y, %m, %d, %H, %M") for fs in list1_date]
dateStr = list1_date.strftime("%Y, %m, %d, %H, %M")
the overall code
import datetime
def date_sorting_operation(input_list):
list1_date = [datetime.datetime.strptime(ts, "%Y-%m-%d") for ts in input_list]
for i in range(len(list1_date)):
for i in range(len(list1_date) - 1):
if list1_date[i] > list1_date[i + 1]:
temporary = list1_date[i + 1]
list1_date[i + 1] = list1_date[i]
list1_date[i] = temporary
#list1_date_string = [datetime.strftime(fs, "%Y, %m, %d, %H, %M") for fs in list1_date]
#dateStr = list1_date.strftime("%Y, %m, %d, %H, %M")
return list1_date, type(list1_date)
customer_date_list = ['2011-06-2', '2011-08-05', '2011-02-04', '2010-01-14', '2010-12-13', '2010-01-12', '2010-2-11', '2010-02-07', '2010-12-02', '2011-11-30']
print (date_sorting_operation(customer_date_list))
the code and output picture:
You can sort the dates by converting them to datetime objects using a lambda (inline) function and using the converted datetime objects as the key for sorting.
from datetime import datetime
customer_date_list = ['2011-06-2', '2011-08-05', '2011-02-04', '2010-01-14', '2010-12-13', '2010-01-12', '2010-2-11', '2010-02-07', '2010-12-02', '2011-11-30']
customer_date_list.sort(key = lambda date: datetime.strptime(date, '%Y-%m-%d'))
print(customer_date_list)
# output : ['2010-01-12', '2010-01-14', '2010-02-07', '2010-2-11', '2010-12-02', '2010-12-13', '2011-02-04', '2011-06-2', '2011-08-05', '2011-11-30']
Given that the dates you receive are in the format YYYY-MM-DD... Why not simply sort them as strings if you just want to order them?
sorted(customer_date_list)
Would give you the output you want.
You can try:
from datetime import datetime
customer_date_list = ['2011-06-2', '2011-08-05', '2011-02-04', '2010-01-14', '2010-12-13', '2010-01-12', '2010-2-11', '2010-02-07', '2010-12-02', '2011-11-30']
# using only text functions
sorted(['-'.join([y.zfill(2) for y in x.split('-')]) for x in customer_date_list])
# with date conversion
sorted([datetime.strptime(x, '%Y-%m-%d').strftime('%Y-%m-%d') for x in customer_date_list])

pandas Calendar issue

This code for CustomBusinessDay() works fine:
from datetime import datetime
from pandas.tseries.offsets import CustomBusinessDay
runday = datetime(2021,12,30).date()
nextday = (runday + CustomBusinessDay()).date()
output 1:
In [26]: nextday
Out[26]: datetime.date(2021, 12, 31)
However, when adding an optional calendar as in date functionality , it produces the next business day even though today's date (Dec 31, 2021) is not a holiday according to a specified calendar below:
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, \
USMemorialDay, USMartinLutherKingJr, USPresidentsDay, GoodFriday, \
USLaborDay, USThanksgivingDay, nearest_workday
class NYSECalendar(AbstractHolidayCalendar):
''' NYSE holiday calendar via pandas '''
rules = [
Holiday('New Years Day', month=1, day=1, observance=nearest_workday),
USMartinLutherKingJr,
USPresidentsDay,
GoodFriday,
USMemorialDay,
Holiday('USIndependenceDay', month=7, day=4, observance=nearest_workday),
USLaborDay,
USThanksgivingDay,
Holiday('Christmas', month=12, day=25, observance=nearest_workday),
]
nextday = (runday + CustomBusinessDay(calendar=NYSECalendar())).date()
output2:
In [27]: nextday
Out[27]: datetime.date(2022, 1, 3)
This line is the location of your problem:
Holiday('New Years Day', month=1, day=1, observance=nearest_workday)
If you take a look at the source code, nearest_workday means that the holiday is observed on a Friday if it falls on a Saturday, and on a Monday if the holiday falls on a Sunday. Since New Year's Day 2022 falls on a Saturday, it is observed today (12/31/2021) according to your calendar.
Removing the observance parameter will lead to an output of 2021-12-31.

Get the Month with year including one character in python

I'm trying to get the months list with year like [
"2019M10",
"2019M11",
"2019M12",
"2020M01",
"2020M02",
"2020M03",
"2020M04",
"2020M05",
"2020M06",
"2020M07",
"2020M08",
"2020M09",
"2020M10",
"2020M11",
"2020M12",
"2021M01",
"2021M02",
"2021M03",
"2021M04",
"2021M05"
]
Following Code Sample I'm using
import datetime
import json
from_year = 2018
last_year = datetime.datetime.now().year
print(last_year)
year_list = list(range(from_year, last_year))
new_month = []
for all_year in year_list:
all_months = [str(all_year)+'M'+str(i) for i in list(range(1,13))]
all_months.extend(all_months)
print(all_months )
months = json.dumps(all_months)
print(months)
I'm not getting the desired output.
Use Timestamp.to_period for actual year and month, create PeriodIndex by period_range and then convert values to format YYYYMmm by PeriodIndex.strftime:
from_year = 2018
last_year = pd.to_datetime('now').to_period('m')
print(last_year)
2021-07
months = pd.period_range(from_year, last_year, freq='M').strftime('%YM%m').tolist()
print (months)
['2018M01', '2018M02', '2018M03', '2018M04', '2018M05', '2018M06', '2018M07', '2018M08',
'2018M09', '2018M10', '2018M11', '2018M12', '2019M01', '2019M02', '2019M03', '2019M04',
'2019M05', '2019M06', '2019M07', '2019M08', '2019M09', '2019M10', '2019M11', '2019M12',
'2020M01', '2020M02', '2020M03', '2020M04', '2020M05', '2020M06', '2020M07', '2020M08',
'2020M09', '2020M10', '2020M11', '2020M12', '2021M01', '2021M02', '2021M03', '2021M04',
'2021M05', '2021M06', '2021M07']
If need all months add next year and then slice last value of months:
from_year = 2018
last_year = pd.to_datetime('now').year + 1
print(last_year)
2022
months = pd.period_range(from_year, last_year, freq='M')[:-1].strftime('%YM%m').tolist()
print (months)
['2018M01', '2018M02', '2018M03', '2018M04', '2018M05', '2018M06', '2018M07', '2018M08',
'2018M09', '2018M10', '2018M11', '2018M12', '2019M01', '2019M02', '2019M03', '2019M04',
'2019M05', '2019M06', '2019M07', '2019M08', '2019M09', '2019M10', '2019M11', '2019M12',
'2020M01', '2020M02', '2020M03', '2020M04', '2020M05', '2020M06', '2020M07', '2020M08',
'2020M09', '2020M10', '2020M11', '2020M12', '2021M01', '2021M02', '2021M03', '2021M04',
'2021M05', '2021M06', '2021M07', '2021M08', '2021M09', '2021M10', '2021M11', '2021M12']
Your solution with nested list comprehension with flatten:
from_year = 2018
last_year = datetime.datetime.now().year
print(last_year)
2021
year_list = list(range(from_year, last_year))
months = [f'{all_year}M{i:02}' for all_year in year_list for i in list(range(1,13))]
print (months)
['2018M01', '2018M02', '2018M03', '2018M04', '2018M05', '2018M06', '2018M07', '2018M08',
'2018M09', '2018M10', '2018M11', '2018M12', '2019M01', '2019M02', '2019M03', '2019M04',
'2019M05', '2019M06', '2019M07', '2019M08', '2019M09', '2019M10', '2019M11', '2019M12',
'2020M01', '2020M02', '2020M03', '2020M04', '2020M05', '2020M06', '2020M07', '2020M08',
'2020M09', '2020M10', '2020M11', '2020M12', '2021M01', '2021M02', '2021M03', '2021M04',
'2021M05', '2021M06', '2021M07', '2021M08', '2021M09', '2021M10', '2021M11', '2021M12']
you are creating a new list every time you loop and extending it .So the last data is getting wiped off and filled with the latest data and you are extending it .So the data is appearing twice.
The solution given by #jezarel is most efficient, but then you can make these modification
import datetime
import json
from_year = 2018
last_year = datetime.datetime.now().year
print(last_year)
year_list = list(range(from_year, last_year))
print(year_list)
new_month = []
all_months=[]
for all_year in year_list:
new_all_months = [str(all_year)+'M'+str(i) for i in list(range(1,13))]
all_months.extend(new_all_months)

Get the format in dateutil.parse

Is there a way to get the "format" after parsing a date in dateutil. For example something like:
>>> x = parse("2014-01-01 00:12:12")
datetime.datetime(2014, 1, 1, 0, 12, 12)
x.get_original_string_format()
YYYY-MM-DD HH:MM:SS # %Y-%m-%d %H:%M:%S
# Or, passing the date-string directly
get_original_string_format("2014-01-01 00:12:12")
YYYY-MM-DD HH:MM:SS # %Y-%m-%d %H:%M:%S
Update: I'd like to add a bounty to this question to see if someone could add an answer that would do the equivalent on getting the string-format of a common date-string passed. It can use dateutil if you want, but it doesn't have to. Hopefully we'll get some creative solutions here.
Is there a way to get the "format" after parsing a date in dateutil?
Not possible with dateutil. The problem is that dateutil never has the format as an intermediate result any time during the parsing as it detects separate components of the datetime separately - take a look at this not quite easy to read source code.
I don't know of a way that you can return the parsed format from dateutil (or any other python timestamp parser that I know of).
Implementing your own timestamp parsing function that returns a list of possible formats and related datetime objects is fairly trivial using datetime.strptime() but doing it efficiently against a broadly useful list of possible timestamp formats is not.
The following example utilizes a list of just over 100 formats. It does not even scratch the surface of the wide variety of formats parsed by dateutil. It tests each format in sequence until it exhausts all formats in the list (likely much less efficient than the dateutil approach of locating the various datetime parts independently as noted in the answer from #alecxe).
In addition, I have included some example timestamp formats that include time zone names (instead of offsets). If you run the example function below against those particular datetime strings, you may find that it does not return the expected matches even though I have included matching formats using the %Z directive. Some explanation for the challenges with using %Z to handle time zone names can be found in issue 22377 at bugs.python.org (just to highlight another non-trivial aspect of implementing your own datetime parsing function).
With all of those caveats, if you are dealing with a manageable set of potential formats, implementing something simple like the below may get you what you need.
Example function that attempts to match a datetime string against a list of formats and return a dict that includes the original datestring and a list of matches, each a dict that includes a datetime object along with the matched format:
from datetime import datetime
def parse_timestamp(datestring, formats):
results = {'datestring': datestring, 'matches': []}
for f in formats:
try:
d = datetime.strptime(datestring, f)
except:
continue
results['matches'].append({'datetime': d, 'format': f})
return results
Example formats and datetime strings:
formats = ['%A, %B %d, %Y', '%A, %B %d, %Y %I:%M:%S %p %Z', '%A, %d %B %Y', '%B %d %Y', '%B %d, %Y', '%H:%M:%S', '%H:%M:%S,%f', '%H:%M:%S.%f', '%Y %b %d %H:%M:%S.%f', '%Y %b %d %H:%M:%S.%f %Z', '%Y %b %d %H:%M:%S.%f*%Z', '%Y%m%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S %z', '%Y-%m-%d %H:%M:%S%z', '%Y-%m-%d %H:%M:%S,%f', '%Y-%m-%d %H:%M:%S,%f%z', '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S.%f%z', '%Y-%m-%d %I:%M %p', '%Y-%m-%d %I:%M:%S %p', '%Y-%m-%d*%H:%M:%S', '%Y-%m-%d*%H:%M:%S:%f', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S%Z', '%Y-%m-%dT%H:%M:%S%z', '%Y-%m-%dT%H:%M:%S*%f%z', '%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M:%S.%f%z', '%Y/%m/%d', '%Y/%m/%d*%H:%M:%S', '%a %b %d %H:%M:%S %Z %Y', '%a, %d %b %Y %H:%M:%S %z', '%b %d %H:%M:%S', '%b %d %H:%M:%S %Y', '%b %d %H:%M:%S %z', '%b %d %H:%M:%S %z %Y', '%b %d %Y', '%b %d %Y %H:%M:%S', '%b %d, %Y', '%b %d, %Y %I:%M:%S %p', '%b.%d.%Y', '%d %B %Y', '%d %B %Y %H:%M:%S %Z', '%d %b %Y %H:%M:%S', '%d %b %Y %H:%M:%S %z', '%d %b %Y %H:%M:%S*%f', '%d%m_%H:%M:%S', '%d%m_%H:%M:%S.%f', '%d-%b-%Y', '%d-%b-%Y %H:%M:%S', '%d-%b-%Y %H:%M:%S.%f', '%d-%b-%Y %I:%M:%S %p', '%d-%m-%Y', '%d-%m-%Y %I:%M %p', '%d-%m-%Y %I:%M:%S %p', '%d-%m-%y', '%d-%m-%y %I:%M %p', '%d-%m-%y %I:%M:%S %p', '%d/%b %H:%M:%S,%f', '%d/%b/%Y %H:%M:%S', '%d/%b/%Y %I:%M %p', '%d/%b/%Y:%H:%M:%S', '%d/%b/%Y:%H:%M:%S %z', '%d/%m/%Y', '%d/%m/%Y %H:%M:%S %z', '%d/%m/%Y %I:%M %p', '%d/%m/%Y %I:%M:%S %p', '%d/%m/%Y %I:%M:%S %p:%f', '%d/%m/%Y*%H:%M:%S', '%d/%m/%Y*%H:%M:%S*%f', '%d/%m/%y', '%d/%m/%y %H:%M:%S', '%d/%m/%y %H:%M:%S %z', '%d/%m/%y %I:%M %p', '%d/%m/%y %I:%M:%S %p', '%d/%m/%y*%H:%M:%S', '%m%d_%H:%M:%S', '%m%d_%H:%M:%S.%f', '%m-%d-%Y', '%m-%d-%Y %I:%M %p', '%m-%d-%Y %I:%M:%S %p', '%m-%d-%y', '%m-%d-%y %I:%M %p', '%m-%d-%y %I:%M:%S %p', '%m/%d/%Y', '%m/%d/%Y %H:%M:%S %z', '%m/%d/%Y %I:%M %p', '%m/%d/%Y %I:%M:%S %p', '%m/%d/%Y %I:%M:%S %p:%f', '%m/%d/%Y*%H:%M:%S', '%m/%d/%Y*%H:%M:%S*%f', '%m/%d/%y', '%m/%d/%y %H:%M:%S', '%m/%d/%y %H:%M:%S %z', '%m/%d/%y %I:%M %p', '%m/%d/%y %I:%M:%S %p', '%m/%d/%y*%H:%M:%S', '%y%m%d %H:%M:%S', '%y-%m-%d %H:%M:%S', '%y-%m-%d %H:%M:%S,%f', '%y-%m-%d %H:%M:%S,%f %z', '%y/%m/%d %H:%M:%S']
datestrings = ['03-11-1999', '03-12-1999 5:06 AM', '03-12-1999 5:06:07 AM', '03-12-99 5:06 AM', '03-12-99 5:06:07 AM', '03/12/1999', '03/12/1999 5:06 AM', '03/12/1999 5:06:07 AM', '03/12/99 5:06 AM', '03/12/99 5:06:07', '03/12/99 5:06:07 AM', '04/23/17 04:34:22 +0000', '0423_11:42:35', '0423_11:42:35.883', '05/09/2017*08:22:14*612', '06/01/22 04:11:05', '08/10/11*13:33:56', '10-04-19 12:00:17', '10-06-26 02:31:29,573', '10/03/2017 07:29:46 -0700', '11-02-11 16:47:35,985 +0000', '11/22/2017*05:13:11', '11:42:35', '11:42:35,173', '11:42:35.173', '12/03/1999', '12/03/1999 5:06 AM', '12/03/99 5:06 AM', '12/3/1999', '12/3/1999 5:06 AM', '12/3/1999 5:06:07 AM', '150423 11:42:35', '19/Apr/2017:06:36:15 -0700', '1999-03-12 05:06:07.0', '1999-03-12 5:06 AM', '1999-03-12 5:06:07 AM', '1999-03-12+01:00', '1999-3-12 5:06 AM', '1999-3-12 5:06:07 AM', '1999/3/12', '20150423 11:42:35.173', '2017 Mar 03 05:12:41.211 PDT', '2017 Mar 10 01:44:20.392', '2017-02-11T18:31:44', '2017-03-10 14:30:12,655+0000', '2017-03-12 13:11:34.222-0700', '2017-03-12T17:56:22-0700', '2017-06-26 02:31:29,573', '2017-07-01T14:59:55.711+0000', '2017-07-04*13:23:55', '2017-07-22T16:28:55.444', '2017-08-19 12:17:55 -0400', '2017-08-19 12:17:55-0400', '2017-09-08T03:13:10', '2017-10-14T22:11:20+0000', '2017-10-30*02:47:33:899', '2017-11-22T10:10:15.455', '2017/04/12*19:37:50', '2018 Apr 13 22:08:13.211*PDT', '2018-02-27 15:35:20.311', '2018-08-20T13:20:10*633+0000', '22 Mar 1999 05:06:07 +0100', '22 March 1999', '22 March 1999 05:06:07 CET', '22-Mar-1999', '22-Mar-1999 05:06:07', '22-Mar-1999 5:06:07 AM', '22/03/1999 5:06:07 AM', '22/Mar/1999 5:06:07 +0100', '22/Mar/99 5:06 AM', '23 Apr 2017 10:32:35*311', '23 Apr 2017 11:42:35', '23-Apr-2017 11:42:35', '23-Apr-2017 11:42:35.883', '23/Apr 11:42:35,173', '23/Apr/2017 11:42:35', '23/Apr/2017:11:42:35', '3-11-1999', '3-12-1999 5:06 AM', '3-12-99 5:06 AM', '3-12-99 5:06:07 AM', '3-22-1999 5:06:07 AM', '3/12/1999', '3/12/1999 5:06 AM', '3/12/1999 5:06:07 AM', '3/12/99 5:06 AM', '3/12/99 5:06:07', '8/5/2011 3:31:18 AM:234', '9/28/2011 2:23:15 PM', 'Apr 20 00:00:35 2010', 'Dec 2, 2017 2:39:58 AM', 'Jan 21 18:20:11 +0000 2017', 'Jun 09 2018 15:28:14', 'Mar 16 08:12:04', 'Mar 22 1999', 'Mar 22, 1999', 'Mar 22, 1999 5:06:07 AM', 'Mar.22.1999', 'March 22 1999', 'March 22, 1999', 'Mon Mar 22 05:06:07 CET 1999', 'Mon, 22 Mar 1999 05:06:07 +0100', 'Monday, 22 March 1999', 'Monday, March 22, 1999', 'Monday, March 22, 1999 5:06:07 AM CET', 'Sep 28 19:00:00 +0000']
Example usage:
print(parse_timestamp('2018-08-20T13:20:10*633+0000', formats))
# OUTPUT
# {'datestring': '2018-08-20T13:20:10*633+0000', 'matches': [{'datetime': datetime.datetime(2018, 8, 20, 13, 20, 10, 633000, tzinfo=datetime.timezone.utc), 'format': '%Y-%m-%dT%H:%M:%S*%f%z'}]}
My idea was to:
Create an object that has a list of candidate specifiers you think might be in the date pattern (the more you add, the more possibilities you will get out the other end)
Parse the date string
Create a list of possible specifiers for each element in the string, based on the date and the list of candidates you supplied.
Recombine them to produce a list of 'possibles'.
If you get only a single candidate, you can be pretty sure is it the right format. But you will often get many possibilities (especially with dates, months, minutes and hours all in the 0-10 range).
Example class:
import re
from itertools import product
from dateutil.parser import parse
from collections import defaultdict, Counter
COMMON_SPECIFIERS = [
'%a', '%A', '%d', '%b', '%B', '%m',
'%Y', '%H', '%p', '%M', '%S', '%Z',
]
class FormatFinder:
def __init__(self,
valid_specifiers=COMMON_SPECIFIERS,
date_element=r'([\w]+)',
delimiter_element=r'([\W]+)',
ignore_case=False):
self.specifiers = valid_specifiers
joined = (r'' + date_element + r"|" + delimiter_element)
self.pattern = re.compile(joined)
self.ignore_case = ignore_case
def find_candidate_patterns(self, date_string):
date = parse(date_string)
tokens = self.pattern.findall(date_string)
candidate_specifiers = defaultdict(list)
for specifier in self.specifiers:
token = date.strftime(specifier)
candidate_specifiers[token].append(specifier)
if self.ignore_case:
candidate_specifiers[token.
upper()] = candidate_specifiers[token]
candidate_specifiers[token.
lower()] = candidate_specifiers[token]
options_for_each_element = []
for (token, delimiter) in tokens:
if token:
if token not in candidate_specifiers:
options_for_each_element.append(
[token]) # just use this verbatim?
else:
options_for_each_element.append(
candidate_specifiers[token])
else:
options_for_each_element.append([delimiter])
for parts in product(*options_for_each_element):
counts = Counter(parts)
max_count = max(counts[specifier] for specifier in self.specifiers)
if max_count > 1:
# this is a candidate with the same item used more than once
continue
yield "".join(parts)
And some sample tests:
def test_it_returns_value_from_question_1():
s = "2014-01-01 00:12:12"
candidates = FormatFinder().find_candidate_patterns(s)
sut = FormatFinder()
candidates = sut.find_candidate_patterns(s)
assert "%Y-%m-%d %H:%M:%S" in candidates
def test_it_returns_value_from_question_2():
s = 'Jan. 04, 2017'
sut = FormatFinder()
candidates = sut.find_candidate_patterns(s)
candidates = list(candidates)
assert "%b. %d, %Y" in candidates
assert len(candidates) == 1
def test_it_can_ignore_case():
# NB: apparently the 'AM/PM' is meant to be capitalised in my locale!
# News to me!
s = "JANUARY 12, 2018 02:12 am"
sut = FormatFinder(ignore_case=True)
candidates = sut.find_candidate_patterns(s)
assert "%B %d, %Y %H:%M %p" in candidates
def test_it_returns_parts_that_have_no_date_component_verbatim():
# In this string, the 'at' is considered as a 'date' element,
# but there is no specifier that produces a candidate for it
s = "January 12, 2018 at 02:12 AM"
sut = FormatFinder()
candidates = sut.find_candidate_patterns(s)
assert "%B %d, %Y at %H:%M %p" in candidates
To make it a bit clearer, here's some example of using this code in an iPython shell:
In [2]: ff = FormatFinder()
In [3]: list(ff.find_candidate_patterns("2014-01-01 00:12:12"))
Out[3]:
['%Y-%d-%m %H:%M:%S',
'%Y-%d-%m %H:%S:%M',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%S:%M']
In [4]: list(ff.find_candidate_patterns("Jan. 04, 2017"))
Out[4]: ['%b. %d, %Y']
In [5]: list(ff.find_candidate_patterns("January 12, 2018 at 02:12 AM"))
Out[5]: ['%B %d, %Y at %H:%M %p', '%B %M, %Y at %H:%d %p']
In [6]: ff_without_case = FormatFinder(ignore_case=True)
In [7]: list(ff_without_case.find_candidate_patterns("JANUARY 12, 2018 02:12 am"))
Out[7]: ['%B %d, %Y %H:%M %p', '%B %M, %Y %H:%d %p']
Idea:
Inspect the user input date string, and build possible date format set
Loop over the format set, use datetime.strptime parse the date string with individual possible date format.
Format the date from step 2 with datetime.strftime, if the result equal to the origin date string, then this format is a possible date format.
Algorithm implementation
from datetime import datetime
import itertools
import re
FORMAT_CODES = (
r'%a', r'%A', r'%w', r'%d', r'%b', r'%B', r'%m', r'%y', r'%Y',
r'%H', r'%I', r'%p', r'%M', r'%S', r'%f', r'%z', r'%Z', r'%j',
r'%U', r'%W',
)
TWO_LETTERS_FORMATS = (
r'%p',
)
THREE_LETTERS_FORMATS = (
r'%a', r'%b'
)
LONG_LETTERS_FORMATS = (
r'%A', r'%B', r'%z', r'%Z',
)
SINGLE_DIGITS_FORMATS = (
r'w',
)
TWO_DIGITS_FORMATS = (
r'%d', r'%m', r'%y', r'%H', r'%I', r'%M', r'%S', r'%U', r'%W',
)
THREE_DIGITS_FORMATS = (
r'%j',
)
FOUR_DIGITS_FORMATS = (
r'%Y',
)
LONG_DIGITS_FORMATS = (
r'%f',
)
# Non format code symbols
SYMBOLS = (
'-',
':',
'+',
'Z',
',',
' ',
)
if __name__ == '__main__':
date_str = input('Please input a date: ')
# Split with non format code symbols
pattern = r'[^{}]+'.format(''.join(SYMBOLS))
components = re.findall(pattern, date_str)
# Create a format placeholder, eg. '{}-{}-{} {}:{}:{}+{}'
placeholder = re.sub(pattern, '{}', date_str)
formats = []
for comp in components:
if re.match(r'^\d{1}$', comp):
formats.append(SINGLE_DIGITS_FORMATS)
elif re.match(r'^\d{2}$', comp):
formats.append(TWO_DIGITS_FORMATS)
elif re.match(r'^\d{3}$', comp):
formats.append(THREE_DIGITS_FORMATS)
elif re.match(r'^\d{4}$', comp):
formats.append(FOUR_DIGITS_FORMATS)
elif re.match(r'^\d{5,}$', comp):
formats.append(LONG_DIGITS_FORMATS)
elif re.match(r'^[a-zA-Z]{2}$', comp):
formats.append(TWO_LETTERS_FORMATS)
elif re.match(r'^[a-zA-Z]{3}$', comp):
formats.append(THREE_LETTERS_FORMATS)
elif re.match(r'^[a-zA-Z]{4,}$', comp):
formats.append(LONG_LETTERS_FORMATS)
else:
formats.append(FORMAT_CODES)
# Create a possible format set
possible_set = itertools.product(*formats)
found = 0
for possible_format in possible_set:
# Create a format with possible format combination
dt_format = placeholder.format(*possible_format)
try:
dt = datetime.strptime(date_str, dt_format)
# Use the format to parse the date, and format the
# date back to string and compare with the origin one
if dt.strftime(dt_format) == date_str:
print('Possible result: {}'.format(dt_format))
found += 1
except Exception:
continue
if found == 0:
print('No pattern found')
Usage:
$ python3 reverse.py
Please input a date: 2018-12-31 10:26 PM
Possible result: %Y-%d-%M %I:%S %p
Possible result: %Y-%d-%S %I:%M %p
Possible result: %Y-%m-%d %I:%M %p
Possible result: %Y-%m-%d %I:%S %p
Possible result: %Y-%m-%M %I:%d %p
Possible result: %Y-%m-%M %I:%S %p
Possible result: %Y-%m-%S %I:%d %p
Possible result: %Y-%m-%S %I:%M %p
Possible result: %Y-%H-%d %m:%M %p
Possible result: %Y-%H-%d %m:%S %p
Possible result: %Y-%H-%d %M:%S %p
Possible result: %Y-%H-%d %S:%M %p
Possible result: %Y-%H-%M %d:%S %p
Possible result: %Y-%H-%M %m:%d %p
Possible result: %Y-%H-%M %m:%S %p
Possible result: %Y-%H-%M %S:%d %p
Possible result: %Y-%H-%S %d:%M %p
Possible result: %Y-%H-%S %m:%d %p
Possible result: %Y-%H-%S %m:%M %p
Possible result: %Y-%H-%S %M:%d %p
Possible result: %Y-%I-%d %m:%M %p
Possible result: %Y-%I-%d %m:%S %p
Possible result: %Y-%I-%d %M:%S %p
Possible result: %Y-%I-%d %S:%M %p
Possible result: %Y-%I-%M %d:%S %p
Possible result: %Y-%I-%M %m:%d %p
Possible result: %Y-%I-%M %m:%S %p
Possible result: %Y-%I-%M %S:%d %p
Possible result: %Y-%I-%S %d:%M %p
Possible result: %Y-%I-%S %m:%d %p
Possible result: %Y-%I-%S %m:%M %p
Possible result: %Y-%I-%S %M:%d %p
Possible result: %Y-%M-%d %I:%S %p
Possible result: %Y-%M-%S %I:%d %p
Possible result: %Y-%S-%d %I:%M %p
Possible result: %Y-%S-%M %I:%d %p
My idea was to create a class something like this, might not be accurate
from datetime import datetime
import re
class DateTime(object):
dateFormat = {"%d": "dd", "%Y": "YYYY", "%a": "Day", "%A": "DAY", "%w": "ww", "%b": "Mon", "%B": "MON", "%m": "mm",
"%H": "HH", "%I": "II", "%p": "pp", "%M": "MM", "%S": "SS"} # wil contain all format equivalent
def __init__(self, date_str, format):
self.dateobj = datetime.strptime(date_str, format)
self.format = format
def parse_format(self):
output=None
reg = re.compile("%[A-Z a-z]")
fmts = None
if self.format is not None:
fmts = re.findall(reg, self.format)
if fmts is not None:
output = self.format
for f in fmts:
output = output.replace(f, DateTime.dateFormat[f])
return output
nDate = DateTime("12 January, 2018", "%d %B, %Y")
print(nDate.parse_format())
You can wrap the function to store the arguments along with the result any time you call the wrapped version:
from dateutil.parser import parse
from functools import wraps
def parse_wrapper(function):
#wraps(function)
def wrapper(*args):
return {'datetime': function(*args), 'args': args}
return wrapper
wrapped_parse = parse_wrapper(parse)
x = wrapped_parse("2014-01-01 00:12:12")
# {'datetime': datetime.datetime(2014, 1, 1, 0, 12, 12),
# 'args': ('2014-01-01 00:12:12',)}

Categories