python convert string time to sql datetime format - python

I am importing a large csv file to ETL into a database, and the date format that was originally set in the csv file looks like 4/22/2016 1:00:00 PM. Each date is part of a larger list that might have non-date type items in it. for example:
v = ['4/29/2016 8:25:58 AM', '5/25/2016 2:22:22 PM', 'True', 'Foo', 1]
I would like to reformat every date (if present in the list) with the correct MySQL format of
%m-%d-%Y %I:%M:%S
How would i do this with a list comprehension? my code is not working for obvious reasons but i'm not sure where to go from here. I need to retain the index that the date is found in v.
from datetime import datetime, date, time
v = ['4/29/2016 8:25:58 AM', '5/25/2016 2:22:22 PM', 'True', 'Foo', 1]
def fixdate(_params):
tstamp = datetime.strptime(_params, "%m/%d/%Y %I:%M:%S %p")
newtstamp = date.strftime(tstamp, "%m-%d-%Y %I:%M:%S")
replace = { _params: newtstamp }
l = [replace.get(x, x) for x in _params]
print l
fixdate(v)

Please check this. Comments inline with code.
from datetime import datetime
v = ['4/29/2016 8:25:58 AM', '5/25/2016 2:22:22 PM', 'True', 'Foo', 1]
def fixdate(_params):
print "Before changing format ..."
print _params
#First date in list
tstamp = datetime.strptime(_params[0], "%m/%d/%Y %I:%M:%S %p")
#Add %p after %S if AM or PM is required
newtstamp = datetime.strftime(tstamp, "%m-%d-%Y %I:%M:%S")
#Update the element in list
_params[0] = newtstamp
#Second date in list
tstamp = datetime.strptime(_params[1], "%m/%d/%Y %I:%M:%S %p")
newtstamp = datetime.strftime(tstamp, "%m-%d-%Y %I:%M:%S")
#Update the element in list
_params[1] = newtstamp
print "After changing format..."
print _params
fixdate(v)
Output:
C:\Users\dinesh_pundkar\Desktop>python c.py
Before changing format ...
['4/29/2016 8:25:58 AM', '5/25/2016 2:22:22 PM', 'True', 'Foo', 1]
After changing format...
['04-29-2016 08:25:58', '05-25-2016 02:22:22', 'True', 'Foo', 1]
C:\Users\dinesh_pundkar\Desktop>
Code with list comprehension:
from datetime import datetime
v = ['4/29/2016 8:25:58 AM', '5/25/2016 2:22:22 PM', 'True', 'Foo', 1]
def fixdate(_params):
print "Before changing format ..."
print _params
_params = [i if ':' not in str(i) and '/' not in str(i) else datetime.strftime(datetime.strptime(i, "%m/%d/%Y %I:%M:%S %p"), "%m-%d-%Y %I:%M:%S") for i in _params]
print "After changing format..."
print _params
fixdate(v)
Output:
C:\Users\dinesh_pundkar\Desktop>python c.py
Before changing format ...
['4/29/2016 8:25:58 AM', '5/25/2016 2:22:22 PM', 'True', 'Foo', 1]
After changing format...
['04-29-2016 08:25:58', '05-25-2016 02:22:22', 'True', 'Foo', 1]
C:\Users\dinesh_pundkar\Desktop>

Related

How to validate if a date indicated as a string belongs to an interval of 2 dates indicated in another string?

import os, datetime
content = os.listdir("when")
print(content)
#this print...
#['2022_-_12_-_29 12pp33 am _--_ 2023_-_01_-_25 19pp13 pm.txt', '2023-02-05 00pp00 am.txt']
for i in range(len(content)):
content[i] = content[i].replace("_-_", "-").replace("pp", ":")
print(content) #I prepare the input to use it to search
#this print...
#['2022-12-29 12:33 am _--_ 2023-01-25 19:13 pm.txt', '2023-02-05 00:00 am.txt']
input_to_search_in_folder = "2022_-_01_-_05 12:33 am" #file data to find in the 'when' folder
I have changed the : to pp (referring to point-point) because you cannot place : in folders or/and files, at least not in Windows
2022_-_12_-_29 12pp33 am _--_ 2023_-_01_-_25 19pp13 pm
initial date _--_ final date
In this case input_to_search_in_folder = "2022_-_01_-_05 12:33 am" does not match a file with a specific date name. But if it belongs to the interval of days indicated in the file name '2022_-_12_-_29 12pp33 am _--_ 2023_-_01_-_25 19pp13 pm.txt'
How could I validate that this date "2022_-_01_-_05 12:33 am" does belong to that time interval '2022_-_12_-_29 12pp33 am _--_ 2023_-_01_-_25 19pp13 pm' or if it's this date '2023-02-05 00:00 am'?
If the validation is successful, the program should print the content inside that .txt (in this case inside the 2022_-_12_-_29 12pp33 am _--_ 2023_-_01_-_25 19pp13 pm.txt )
text_file = open("when/" + , "r")
data_inside_this_file = text_file.read()
text_file.close()
#And finally prints the content of the .txt file that matches the date specified in the 'input_to_search_in_folder' variable
print(repr(data_inside_this_file))
I would clean the strings fully, convert them to datetime objects (because these can be compared to each other), then compare then and you have the result and can do whatever with it:
import os
from datetime import datetime
content = os.listdir("when")
print(content)
#['2022_-_12_-_29 12pp33 am _--_ 2023_-_01_-_25 19pp13 pm.txt', '2023-02-05 00pp00 am.txt']
for i in range(len(content)):
content[i] = content[i].replace("_-_", "-").replace("pp", ":")
#['2022-12-29 12:33 am _--_ 2023-01-25 19:13 pm.txt', '2023-02-05 00:00 am.txt']
cleaned_filename = os.path.splitext(content[0])[0] #="2022-12-29 12:33 am _--_ 2023-01-25 19:13 pm"
start_dt = datetime.strptime(content[0].split(" _--_ ")[0], "%Y-%m-%d %H:%M")
#="2022-12-29 12:33 am" = datetime(2022, 12, 29, 12, 33)
last_dt = datetime.strptime(content[0].split(" _--_ ")[1], "%Y-%m-%d %H:%M")
#="2023-01-25 19:13 pm"
third_dt = datetime.strptime(os.path.splitext(content[1])[0], "%Y-%m-%d %H:%M")
#="2023-02-05 00:00 am"
input_to_search = "2022_-_01_-_05 12:33 am".replace("_-_", "-")
#"2022-01-05 12:33 am".
input_dt = datetime.strptime(input_to_search, "%Y-%m-%d %H:%M")
#="datetime(2022, 01, 05, 12, 33)"
if start_dt <= input_dt <= last_dt:
print("in between")
elif input_dt == third_dt:
print("Match")
else:
print("No!")
A way is to extract the dates using regex and then convert them as date like mrblue6's answer:
#!/usr/bin/python3
from datetime import datetime
import re
# Let's assume this is one of the directory entries
direntry='2022_-_12_-_29 12pp33 am _--_ 2023_-_01_-_25 19pp13 pm.txt'
# We exclude in the regex the AM/PM part since the format is 24-hour clock
datePattern = '(\d{4}_-_\d{2}_-_\d{2} \d{2}pp\d{2}) [ap]m'
dirPattern = f'{datePattern} _--_ {datePattern}.txt'
# Let's extract the "milestone" dates
matches = re.search(dirPattern, direntry)
extractedDate1 = matches.group(1)
extractedDate2 = matches.group(2)
# Let's extract the date to check
matches = re.search(datePattern, "2022_-_01_-_05 12pp33 am")
extractedDateToCheck = matches.group(1)
# Let's convert them as date time
readDateFormat = '%Y_-_%m_-_%d %Hpp%M'
date1 = datetime.strptime(extractedDate1, readDateFormat)
date2 = datetime.strptime(extractedDate2, readDateFormat)
dateToCheck = datetime.strptime(extractedDateToCheck, readDateFormat)
# Let's compare them
print (f"Date 1 : {date1}")
print (f"Date 2 : {date2}")
print (f"Date to check: {dateToCheck}")
print (f"Check: {date1 <= dateToCheck <= date2}" )
Output:
Date 1 : 2022-12-29 12:33:00
Date 2 : 2023-01-25 19:13:00
Date to check: 2022-01-05 12:33:00
Check: False

time data '06/04/2020, 12:36 pm - ' does not match format '%d/%m/%y, %H:%M %p - ' (match)

df['message_date'] = pd.to_datetime(df['message_date'], format = '%d/%m/%y, %H:%M %p - ')
The error is:
ValueError: time data '06/04/2020, 12:36 pm - ' does not match format '%d/%m/%y, %H:%M %p - ' (match)
Change small %y to %Y
df['message_date'] = pd.to_datetime(df['message_date'], format = '%d/%m/%Y, %H:%M %p -')

Set file name with auto-number

Python beginner here: I wrote a script which creates a file every 3 minutes, I used strftime to set the name according to date/time it was created:
dt = datetime.now()
dtTemp = dt.strftime('%d-%b-%Y - %H-%M-%S')
filename = '/home/pi/baby_lapse/%s.jpg' % dtTemp
Here's an example of the output:
18-Jan-2019 - 23-21-03.jpg
The problem is that once I have more than one month of files, it creates a problem to sort the files by file name, which is important to me.
To resolve it, I thought to add some auto-number before the strftime string so it will produce an output such as:
000 - 18-Jan-2019 - 23-21-03.jpg
001 - 18-Jan-2019 - 23-24-03.jpg
002 - 18-Jan-2019 - 23-27-03.jpg
How can it be achieved?
If you just prepend the milliseconds since epoch at the beginning, it will always sort by date. For ease of reading, you can leave the human-readable date string there.
To get milliseconds since epoch just use time.time() * 1000.
I decided to follow #chepner suggestion and used 2019-01-18 as the date format.
After setting the date format for future records, I had to run a data fix and fix the naming of the existing records.
I ended up writing my own script that converts file names from this 18-Jan-2019 - 23-21-03.jpg format to 2019-01-18 - 23-21-03.jpg, I'm sharing it in case someone has a similar scenario:
import os
Months = {
"Jan": "01",
"Feb": "02",
"Mar": "03",
"Apr": "04",
"May": "05",
"Jun": "06",
"Jul": "07",
"Aug": "08",
"Sep": "09",
"Oct": "10",
"Nov": "11",
"Dec": "12"
}
for filename in os.listdir("."):
originalDateTime = filename.split(' ') #example: 18-Jan-2019 - 23-21-03.jpg
date = originalDateTime[0] #18-Jan-2019
datesplit = date.split('-') # '18', 'Jan', '2019'
dayOfMonth = datesplit[0] #18
month = datesplit[1] #Jan
year = datesplit[2] #2019
newFileName = year + '-' + Months.get(month, "none") + '-' + dayOfMonth + ' - ' + originalDateTime[2]
print newFileName # 2019-01-18 - 23-21-03
os.rename(filename, newFileName)
I think the better idea would be to change the date formatting into numeric format such as %Y-%m-%d %H:%M:%S. By doing so, it would be easy to sort file by name. For example,
/home/pi/baby_lapse/2019-01-26 20:51:42.jpg
/home/pi/baby_lapse/2019-01-26 20:51:43.jpg
and so on ...
Just tried a sample script considering your case, I got a more readable result
Code
for _ in range(10):
dt = datetime.now()
dtTemp = dt.strftime('%Y-%m-%d %H:%M:%S')
filename = '/home/pi/baby_lapse/%s.jpg' % dtTemp
print(filename)
time.sleep(0.5)
Result
/home/pi/baby_lapse/2019-01-26 20:51:42.jpg
/home/pi/baby_lapse/2019-01-26 20:51:43.jpg
/home/pi/baby_lapse/2019-01-26 20:51:43.jpg
/home/pi/baby_lapse/2019-01-26 20:51:44.jpg
/home/pi/baby_lapse/2019-01-26 20:51:44.jpg
/home/pi/baby_lapse/2019-01-26 20:51:45.jpg
/home/pi/baby_lapse/2019-01-26 20:51:45.jpg
/home/pi/baby_lapse/2019-01-26 20:51:46.jpg
/home/pi/baby_lapse/2019-01-26 20:51:46.jpg
/home/pi/baby_lapse/2019-01-26 20:51:47.jpg

Get the format in dateutil.parse

Is there a way to get the "format" after parsing a date in dateutil. For example something like:
>>> x = parse("2014-01-01 00:12:12")
datetime.datetime(2014, 1, 1, 0, 12, 12)
x.get_original_string_format()
YYYY-MM-DD HH:MM:SS # %Y-%m-%d %H:%M:%S
# Or, passing the date-string directly
get_original_string_format("2014-01-01 00:12:12")
YYYY-MM-DD HH:MM:SS # %Y-%m-%d %H:%M:%S
Update: I'd like to add a bounty to this question to see if someone could add an answer that would do the equivalent on getting the string-format of a common date-string passed. It can use dateutil if you want, but it doesn't have to. Hopefully we'll get some creative solutions here.
Is there a way to get the "format" after parsing a date in dateutil?
Not possible with dateutil. The problem is that dateutil never has the format as an intermediate result any time during the parsing as it detects separate components of the datetime separately - take a look at this not quite easy to read source code.
I don't know of a way that you can return the parsed format from dateutil (or any other python timestamp parser that I know of).
Implementing your own timestamp parsing function that returns a list of possible formats and related datetime objects is fairly trivial using datetime.strptime() but doing it efficiently against a broadly useful list of possible timestamp formats is not.
The following example utilizes a list of just over 100 formats. It does not even scratch the surface of the wide variety of formats parsed by dateutil. It tests each format in sequence until it exhausts all formats in the list (likely much less efficient than the dateutil approach of locating the various datetime parts independently as noted in the answer from #alecxe).
In addition, I have included some example timestamp formats that include time zone names (instead of offsets). If you run the example function below against those particular datetime strings, you may find that it does not return the expected matches even though I have included matching formats using the %Z directive. Some explanation for the challenges with using %Z to handle time zone names can be found in issue 22377 at bugs.python.org (just to highlight another non-trivial aspect of implementing your own datetime parsing function).
With all of those caveats, if you are dealing with a manageable set of potential formats, implementing something simple like the below may get you what you need.
Example function that attempts to match a datetime string against a list of formats and return a dict that includes the original datestring and a list of matches, each a dict that includes a datetime object along with the matched format:
from datetime import datetime
def parse_timestamp(datestring, formats):
results = {'datestring': datestring, 'matches': []}
for f in formats:
try:
d = datetime.strptime(datestring, f)
except:
continue
results['matches'].append({'datetime': d, 'format': f})
return results
Example formats and datetime strings:
formats = ['%A, %B %d, %Y', '%A, %B %d, %Y %I:%M:%S %p %Z', '%A, %d %B %Y', '%B %d %Y', '%B %d, %Y', '%H:%M:%S', '%H:%M:%S,%f', '%H:%M:%S.%f', '%Y %b %d %H:%M:%S.%f', '%Y %b %d %H:%M:%S.%f %Z', '%Y %b %d %H:%M:%S.%f*%Z', '%Y%m%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S %z', '%Y-%m-%d %H:%M:%S%z', '%Y-%m-%d %H:%M:%S,%f', '%Y-%m-%d %H:%M:%S,%f%z', '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S.%f%z', '%Y-%m-%d %I:%M %p', '%Y-%m-%d %I:%M:%S %p', '%Y-%m-%d*%H:%M:%S', '%Y-%m-%d*%H:%M:%S:%f', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S%Z', '%Y-%m-%dT%H:%M:%S%z', '%Y-%m-%dT%H:%M:%S*%f%z', '%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M:%S.%f%z', '%Y/%m/%d', '%Y/%m/%d*%H:%M:%S', '%a %b %d %H:%M:%S %Z %Y', '%a, %d %b %Y %H:%M:%S %z', '%b %d %H:%M:%S', '%b %d %H:%M:%S %Y', '%b %d %H:%M:%S %z', '%b %d %H:%M:%S %z %Y', '%b %d %Y', '%b %d %Y %H:%M:%S', '%b %d, %Y', '%b %d, %Y %I:%M:%S %p', '%b.%d.%Y', '%d %B %Y', '%d %B %Y %H:%M:%S %Z', '%d %b %Y %H:%M:%S', '%d %b %Y %H:%M:%S %z', '%d %b %Y %H:%M:%S*%f', '%d%m_%H:%M:%S', '%d%m_%H:%M:%S.%f', '%d-%b-%Y', '%d-%b-%Y %H:%M:%S', '%d-%b-%Y %H:%M:%S.%f', '%d-%b-%Y %I:%M:%S %p', '%d-%m-%Y', '%d-%m-%Y %I:%M %p', '%d-%m-%Y %I:%M:%S %p', '%d-%m-%y', '%d-%m-%y %I:%M %p', '%d-%m-%y %I:%M:%S %p', '%d/%b %H:%M:%S,%f', '%d/%b/%Y %H:%M:%S', '%d/%b/%Y %I:%M %p', '%d/%b/%Y:%H:%M:%S', '%d/%b/%Y:%H:%M:%S %z', '%d/%m/%Y', '%d/%m/%Y %H:%M:%S %z', '%d/%m/%Y %I:%M %p', '%d/%m/%Y %I:%M:%S %p', '%d/%m/%Y %I:%M:%S %p:%f', '%d/%m/%Y*%H:%M:%S', '%d/%m/%Y*%H:%M:%S*%f', '%d/%m/%y', '%d/%m/%y %H:%M:%S', '%d/%m/%y %H:%M:%S %z', '%d/%m/%y %I:%M %p', '%d/%m/%y %I:%M:%S %p', '%d/%m/%y*%H:%M:%S', '%m%d_%H:%M:%S', '%m%d_%H:%M:%S.%f', '%m-%d-%Y', '%m-%d-%Y %I:%M %p', '%m-%d-%Y %I:%M:%S %p', '%m-%d-%y', '%m-%d-%y %I:%M %p', '%m-%d-%y %I:%M:%S %p', '%m/%d/%Y', '%m/%d/%Y %H:%M:%S %z', '%m/%d/%Y %I:%M %p', '%m/%d/%Y %I:%M:%S %p', '%m/%d/%Y %I:%M:%S %p:%f', '%m/%d/%Y*%H:%M:%S', '%m/%d/%Y*%H:%M:%S*%f', '%m/%d/%y', '%m/%d/%y %H:%M:%S', '%m/%d/%y %H:%M:%S %z', '%m/%d/%y %I:%M %p', '%m/%d/%y %I:%M:%S %p', '%m/%d/%y*%H:%M:%S', '%y%m%d %H:%M:%S', '%y-%m-%d %H:%M:%S', '%y-%m-%d %H:%M:%S,%f', '%y-%m-%d %H:%M:%S,%f %z', '%y/%m/%d %H:%M:%S']
datestrings = ['03-11-1999', '03-12-1999 5:06 AM', '03-12-1999 5:06:07 AM', '03-12-99 5:06 AM', '03-12-99 5:06:07 AM', '03/12/1999', '03/12/1999 5:06 AM', '03/12/1999 5:06:07 AM', '03/12/99 5:06 AM', '03/12/99 5:06:07', '03/12/99 5:06:07 AM', '04/23/17 04:34:22 +0000', '0423_11:42:35', '0423_11:42:35.883', '05/09/2017*08:22:14*612', '06/01/22 04:11:05', '08/10/11*13:33:56', '10-04-19 12:00:17', '10-06-26 02:31:29,573', '10/03/2017 07:29:46 -0700', '11-02-11 16:47:35,985 +0000', '11/22/2017*05:13:11', '11:42:35', '11:42:35,173', '11:42:35.173', '12/03/1999', '12/03/1999 5:06 AM', '12/03/99 5:06 AM', '12/3/1999', '12/3/1999 5:06 AM', '12/3/1999 5:06:07 AM', '150423 11:42:35', '19/Apr/2017:06:36:15 -0700', '1999-03-12 05:06:07.0', '1999-03-12 5:06 AM', '1999-03-12 5:06:07 AM', '1999-03-12+01:00', '1999-3-12 5:06 AM', '1999-3-12 5:06:07 AM', '1999/3/12', '20150423 11:42:35.173', '2017 Mar 03 05:12:41.211 PDT', '2017 Mar 10 01:44:20.392', '2017-02-11T18:31:44', '2017-03-10 14:30:12,655+0000', '2017-03-12 13:11:34.222-0700', '2017-03-12T17:56:22-0700', '2017-06-26 02:31:29,573', '2017-07-01T14:59:55.711+0000', '2017-07-04*13:23:55', '2017-07-22T16:28:55.444', '2017-08-19 12:17:55 -0400', '2017-08-19 12:17:55-0400', '2017-09-08T03:13:10', '2017-10-14T22:11:20+0000', '2017-10-30*02:47:33:899', '2017-11-22T10:10:15.455', '2017/04/12*19:37:50', '2018 Apr 13 22:08:13.211*PDT', '2018-02-27 15:35:20.311', '2018-08-20T13:20:10*633+0000', '22 Mar 1999 05:06:07 +0100', '22 March 1999', '22 March 1999 05:06:07 CET', '22-Mar-1999', '22-Mar-1999 05:06:07', '22-Mar-1999 5:06:07 AM', '22/03/1999 5:06:07 AM', '22/Mar/1999 5:06:07 +0100', '22/Mar/99 5:06 AM', '23 Apr 2017 10:32:35*311', '23 Apr 2017 11:42:35', '23-Apr-2017 11:42:35', '23-Apr-2017 11:42:35.883', '23/Apr 11:42:35,173', '23/Apr/2017 11:42:35', '23/Apr/2017:11:42:35', '3-11-1999', '3-12-1999 5:06 AM', '3-12-99 5:06 AM', '3-12-99 5:06:07 AM', '3-22-1999 5:06:07 AM', '3/12/1999', '3/12/1999 5:06 AM', '3/12/1999 5:06:07 AM', '3/12/99 5:06 AM', '3/12/99 5:06:07', '8/5/2011 3:31:18 AM:234', '9/28/2011 2:23:15 PM', 'Apr 20 00:00:35 2010', 'Dec 2, 2017 2:39:58 AM', 'Jan 21 18:20:11 +0000 2017', 'Jun 09 2018 15:28:14', 'Mar 16 08:12:04', 'Mar 22 1999', 'Mar 22, 1999', 'Mar 22, 1999 5:06:07 AM', 'Mar.22.1999', 'March 22 1999', 'March 22, 1999', 'Mon Mar 22 05:06:07 CET 1999', 'Mon, 22 Mar 1999 05:06:07 +0100', 'Monday, 22 March 1999', 'Monday, March 22, 1999', 'Monday, March 22, 1999 5:06:07 AM CET', 'Sep 28 19:00:00 +0000']
Example usage:
print(parse_timestamp('2018-08-20T13:20:10*633+0000', formats))
# OUTPUT
# {'datestring': '2018-08-20T13:20:10*633+0000', 'matches': [{'datetime': datetime.datetime(2018, 8, 20, 13, 20, 10, 633000, tzinfo=datetime.timezone.utc), 'format': '%Y-%m-%dT%H:%M:%S*%f%z'}]}
My idea was to:
Create an object that has a list of candidate specifiers you think might be in the date pattern (the more you add, the more possibilities you will get out the other end)
Parse the date string
Create a list of possible specifiers for each element in the string, based on the date and the list of candidates you supplied.
Recombine them to produce a list of 'possibles'.
If you get only a single candidate, you can be pretty sure is it the right format. But you will often get many possibilities (especially with dates, months, minutes and hours all in the 0-10 range).
Example class:
import re
from itertools import product
from dateutil.parser import parse
from collections import defaultdict, Counter
COMMON_SPECIFIERS = [
'%a', '%A', '%d', '%b', '%B', '%m',
'%Y', '%H', '%p', '%M', '%S', '%Z',
]
class FormatFinder:
def __init__(self,
valid_specifiers=COMMON_SPECIFIERS,
date_element=r'([\w]+)',
delimiter_element=r'([\W]+)',
ignore_case=False):
self.specifiers = valid_specifiers
joined = (r'' + date_element + r"|" + delimiter_element)
self.pattern = re.compile(joined)
self.ignore_case = ignore_case
def find_candidate_patterns(self, date_string):
date = parse(date_string)
tokens = self.pattern.findall(date_string)
candidate_specifiers = defaultdict(list)
for specifier in self.specifiers:
token = date.strftime(specifier)
candidate_specifiers[token].append(specifier)
if self.ignore_case:
candidate_specifiers[token.
upper()] = candidate_specifiers[token]
candidate_specifiers[token.
lower()] = candidate_specifiers[token]
options_for_each_element = []
for (token, delimiter) in tokens:
if token:
if token not in candidate_specifiers:
options_for_each_element.append(
[token]) # just use this verbatim?
else:
options_for_each_element.append(
candidate_specifiers[token])
else:
options_for_each_element.append([delimiter])
for parts in product(*options_for_each_element):
counts = Counter(parts)
max_count = max(counts[specifier] for specifier in self.specifiers)
if max_count > 1:
# this is a candidate with the same item used more than once
continue
yield "".join(parts)
And some sample tests:
def test_it_returns_value_from_question_1():
s = "2014-01-01 00:12:12"
candidates = FormatFinder().find_candidate_patterns(s)
sut = FormatFinder()
candidates = sut.find_candidate_patterns(s)
assert "%Y-%m-%d %H:%M:%S" in candidates
def test_it_returns_value_from_question_2():
s = 'Jan. 04, 2017'
sut = FormatFinder()
candidates = sut.find_candidate_patterns(s)
candidates = list(candidates)
assert "%b. %d, %Y" in candidates
assert len(candidates) == 1
def test_it_can_ignore_case():
# NB: apparently the 'AM/PM' is meant to be capitalised in my locale!
# News to me!
s = "JANUARY 12, 2018 02:12 am"
sut = FormatFinder(ignore_case=True)
candidates = sut.find_candidate_patterns(s)
assert "%B %d, %Y %H:%M %p" in candidates
def test_it_returns_parts_that_have_no_date_component_verbatim():
# In this string, the 'at' is considered as a 'date' element,
# but there is no specifier that produces a candidate for it
s = "January 12, 2018 at 02:12 AM"
sut = FormatFinder()
candidates = sut.find_candidate_patterns(s)
assert "%B %d, %Y at %H:%M %p" in candidates
To make it a bit clearer, here's some example of using this code in an iPython shell:
In [2]: ff = FormatFinder()
In [3]: list(ff.find_candidate_patterns("2014-01-01 00:12:12"))
Out[3]:
['%Y-%d-%m %H:%M:%S',
'%Y-%d-%m %H:%S:%M',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%S:%M']
In [4]: list(ff.find_candidate_patterns("Jan. 04, 2017"))
Out[4]: ['%b. %d, %Y']
In [5]: list(ff.find_candidate_patterns("January 12, 2018 at 02:12 AM"))
Out[5]: ['%B %d, %Y at %H:%M %p', '%B %M, %Y at %H:%d %p']
In [6]: ff_without_case = FormatFinder(ignore_case=True)
In [7]: list(ff_without_case.find_candidate_patterns("JANUARY 12, 2018 02:12 am"))
Out[7]: ['%B %d, %Y %H:%M %p', '%B %M, %Y %H:%d %p']
Idea:
Inspect the user input date string, and build possible date format set
Loop over the format set, use datetime.strptime parse the date string with individual possible date format.
Format the date from step 2 with datetime.strftime, if the result equal to the origin date string, then this format is a possible date format.
Algorithm implementation
from datetime import datetime
import itertools
import re
FORMAT_CODES = (
r'%a', r'%A', r'%w', r'%d', r'%b', r'%B', r'%m', r'%y', r'%Y',
r'%H', r'%I', r'%p', r'%M', r'%S', r'%f', r'%z', r'%Z', r'%j',
r'%U', r'%W',
)
TWO_LETTERS_FORMATS = (
r'%p',
)
THREE_LETTERS_FORMATS = (
r'%a', r'%b'
)
LONG_LETTERS_FORMATS = (
r'%A', r'%B', r'%z', r'%Z',
)
SINGLE_DIGITS_FORMATS = (
r'w',
)
TWO_DIGITS_FORMATS = (
r'%d', r'%m', r'%y', r'%H', r'%I', r'%M', r'%S', r'%U', r'%W',
)
THREE_DIGITS_FORMATS = (
r'%j',
)
FOUR_DIGITS_FORMATS = (
r'%Y',
)
LONG_DIGITS_FORMATS = (
r'%f',
)
# Non format code symbols
SYMBOLS = (
'-',
':',
'+',
'Z',
',',
' ',
)
if __name__ == '__main__':
date_str = input('Please input a date: ')
# Split with non format code symbols
pattern = r'[^{}]+'.format(''.join(SYMBOLS))
components = re.findall(pattern, date_str)
# Create a format placeholder, eg. '{}-{}-{} {}:{}:{}+{}'
placeholder = re.sub(pattern, '{}', date_str)
formats = []
for comp in components:
if re.match(r'^\d{1}$', comp):
formats.append(SINGLE_DIGITS_FORMATS)
elif re.match(r'^\d{2}$', comp):
formats.append(TWO_DIGITS_FORMATS)
elif re.match(r'^\d{3}$', comp):
formats.append(THREE_DIGITS_FORMATS)
elif re.match(r'^\d{4}$', comp):
formats.append(FOUR_DIGITS_FORMATS)
elif re.match(r'^\d{5,}$', comp):
formats.append(LONG_DIGITS_FORMATS)
elif re.match(r'^[a-zA-Z]{2}$', comp):
formats.append(TWO_LETTERS_FORMATS)
elif re.match(r'^[a-zA-Z]{3}$', comp):
formats.append(THREE_LETTERS_FORMATS)
elif re.match(r'^[a-zA-Z]{4,}$', comp):
formats.append(LONG_LETTERS_FORMATS)
else:
formats.append(FORMAT_CODES)
# Create a possible format set
possible_set = itertools.product(*formats)
found = 0
for possible_format in possible_set:
# Create a format with possible format combination
dt_format = placeholder.format(*possible_format)
try:
dt = datetime.strptime(date_str, dt_format)
# Use the format to parse the date, and format the
# date back to string and compare with the origin one
if dt.strftime(dt_format) == date_str:
print('Possible result: {}'.format(dt_format))
found += 1
except Exception:
continue
if found == 0:
print('No pattern found')
Usage:
$ python3 reverse.py
Please input a date: 2018-12-31 10:26 PM
Possible result: %Y-%d-%M %I:%S %p
Possible result: %Y-%d-%S %I:%M %p
Possible result: %Y-%m-%d %I:%M %p
Possible result: %Y-%m-%d %I:%S %p
Possible result: %Y-%m-%M %I:%d %p
Possible result: %Y-%m-%M %I:%S %p
Possible result: %Y-%m-%S %I:%d %p
Possible result: %Y-%m-%S %I:%M %p
Possible result: %Y-%H-%d %m:%M %p
Possible result: %Y-%H-%d %m:%S %p
Possible result: %Y-%H-%d %M:%S %p
Possible result: %Y-%H-%d %S:%M %p
Possible result: %Y-%H-%M %d:%S %p
Possible result: %Y-%H-%M %m:%d %p
Possible result: %Y-%H-%M %m:%S %p
Possible result: %Y-%H-%M %S:%d %p
Possible result: %Y-%H-%S %d:%M %p
Possible result: %Y-%H-%S %m:%d %p
Possible result: %Y-%H-%S %m:%M %p
Possible result: %Y-%H-%S %M:%d %p
Possible result: %Y-%I-%d %m:%M %p
Possible result: %Y-%I-%d %m:%S %p
Possible result: %Y-%I-%d %M:%S %p
Possible result: %Y-%I-%d %S:%M %p
Possible result: %Y-%I-%M %d:%S %p
Possible result: %Y-%I-%M %m:%d %p
Possible result: %Y-%I-%M %m:%S %p
Possible result: %Y-%I-%M %S:%d %p
Possible result: %Y-%I-%S %d:%M %p
Possible result: %Y-%I-%S %m:%d %p
Possible result: %Y-%I-%S %m:%M %p
Possible result: %Y-%I-%S %M:%d %p
Possible result: %Y-%M-%d %I:%S %p
Possible result: %Y-%M-%S %I:%d %p
Possible result: %Y-%S-%d %I:%M %p
Possible result: %Y-%S-%M %I:%d %p
My idea was to create a class something like this, might not be accurate
from datetime import datetime
import re
class DateTime(object):
dateFormat = {"%d": "dd", "%Y": "YYYY", "%a": "Day", "%A": "DAY", "%w": "ww", "%b": "Mon", "%B": "MON", "%m": "mm",
"%H": "HH", "%I": "II", "%p": "pp", "%M": "MM", "%S": "SS"} # wil contain all format equivalent
def __init__(self, date_str, format):
self.dateobj = datetime.strptime(date_str, format)
self.format = format
def parse_format(self):
output=None
reg = re.compile("%[A-Z a-z]")
fmts = None
if self.format is not None:
fmts = re.findall(reg, self.format)
if fmts is not None:
output = self.format
for f in fmts:
output = output.replace(f, DateTime.dateFormat[f])
return output
nDate = DateTime("12 January, 2018", "%d %B, %Y")
print(nDate.parse_format())
You can wrap the function to store the arguments along with the result any time you call the wrapped version:
from dateutil.parser import parse
from functools import wraps
def parse_wrapper(function):
#wraps(function)
def wrapper(*args):
return {'datetime': function(*args), 'args': args}
return wrapper
wrapped_parse = parse_wrapper(parse)
x = wrapped_parse("2014-01-01 00:12:12")
# {'datetime': datetime.datetime(2014, 1, 1, 0, 12, 12),
# 'args': ('2014-01-01 00:12:12',)}

Repeated values in Django Query on aggregate and SUM

I am using PyGal to render some chart on the frontend. My django-view [Function Based] somewhat looks like this :
def random_view(request):
values_list = list()
camera_dict = dict()
bar_chart = pygal.Bar(spacing=60, explicit_size=True, width=2000,
height=800, pretty_print=True, margin=5, x_label_rotation=60, show_minor_x_labels=True)
bar_chart.x_labels = ['8 AM', '9 AM', '10 AM', '11 AM', '12 Noon', '13 PM', '14 PM',
'15 PM', '16 PM', '17 PM', '18 PM', '19 PM', '20 PM', '21 PM', '22 PM', '23 PM']
if request.method == 'GET':
profile = Profile.objects.get(user_profile=request.user)
store_qs = Store.objects.filter(brand_admin=profile)
for store in store_qs:
cam_qs = Camera.objects.filter(install_location=store)
for cam in cam_qs:
for x in range(10, 22):
value = PeopleCount.objects.filter(
timestamp__date='2017-09-06', timestamp__hour=x, camera=cam).aggregate(Sum('people_count_entry'))['people_count_entry__sum'] # noqa
values_list.append(value)
bar_chart.add(str(cam), values_list)
context = {'test': camera_dict, 'fun': bar_chart.render_data_uri()}
return render(request, 'reports/report_daily.html', context)
The issue is I am getting same values for two different camera object.
Info:
For instance, if a store has two cameras let's say cam1 and cam2. I am getting same values for both the cam which should not be the case.
I don't know where I am making the mistake. Help Appreciated
Thanks in Advance :)
The problem is that you define values_list outside the "camera" loop. What you are doing is building a list containing the values from all the cameras from all the stores. To build a list for each camera, instantiate values_list inside the "camera" loop.
#...
for cam in cam_qs:
values_list = []
# ...

Categories