for x in y loop capture instance value - python

If I have a string like the following:
output = '''
Certificate 1:
Valid from: Mon Jun 12 14:58:50 EDT 2017
Valid until: Wed Jun 12 15:28:50 EDT 2019
Certificate 2:
Valid from: Mon Jun 12 15:00:43 EDT 2017
Valid until: Wed Jun 12 15:30:43 EDT 2019
'''
I want to differentiate between the two values when I convert to unixtime. How do I tell it when it's Certificate 1 or Certificate 2?
This is what I have so far, works for getting the two dates, but I don't know how to say if it's the first result then it's Certificate 1.
for line in output.splitlines():
if 'Valid until' in line:
environment = '???'
valid_until_time = (line.split(':', 1)[1]).strip()[4:]
valid_until_time = valid_until_time.replace(' EDT', '')
unixtime = time.mktime(datetime.strptime(valid_until_time, '%b %d %H:%M:%S %Y').timetuple())
send_to_zabbixsender(zabbix_executable,
zabbix_config,
item_key='{0}.expirydate'.format(environment),
item_value=unixtime)

just store current certificate as a state when you encounter it:
import time
from datetime import datetime
output = '''
Certificate 1:
Valid from: Mon Jun 12 14:58:50 EDT 2017
Valid until: Wed Jun 12 15:28:50 EDT 2019
Certificate 2:
Valid from: Mon Jun 12 15:00:43 EDT 2017
Valid until: Wed Jun 12 15:30:43 EDT 2019
'''
current_certificate = 0
for line in output.splitlines():
if line.startswith("Certificate"):
current_certificate = int(line.split()[1].rstrip(":"))
if 'Valid until' in line:
environment = '???'
valid_until_time = (line.split(':', 1)[1]).strip()[4:]
valid_until_time = valid_until_time.replace(' EDT', '')
unixtime = time.mktime(datetime.strptime(valid_until_time, '%b %d %H:%M:%S %Y').timetuple())
print("{}: {}".format(current_certificate,unixtime))
this standalone example prints:
1: 1560346130.0
2: 1560346243.0

there's lots of different ways to solve this problem.
One way is by simply checking if it's cert 1 or 2
for line in output.splitlines():
if 'Certificate 1' in line: cert1Bool = True
if 'Certificate 2' in line: cert1Bool = False
Then move forward with the rest of your code, only check cert1Bool as needed

I would just keep track of current certificate outside of the for loop.
e.g.:
certificate = ''
for line in output.splitlines():
if 'certificate' in line:
certificate = line
else if 'Valid until' in line:
environment = certificate
valid_until_time = (line.split(':', 1)[1]).strip()[4:]
valid_until_time = valid_until_time.replace(' EDT', '')
unixtime = time.mktime(datetime.strptime(valid_until_time, '%b %d %H:%M:%S %Y').timetuple())
send_to_zabbixsender(zabbix_executable,
zabbix_config,
item_key='{0}.expirydate'.format(environment),
item_value=unixtime)

Related

Date format conversion 'Wed Oct 20 16:42:04 +0000 2021' on Python

I wrote a script for receiving data from Twitter, now I'm doing column splitting, I want the date and time to be in separate columns.
I get a date like: Wed Oct 20 16:42:04 +0000 2021
I do it with the following code:
filtered_data['date'] = tweet['created_at']
Next, I want to convert this date at the time of receipt into two fields using datetime
date_formats = '%d-%m-%Y'
time_formats = '%H:%M:%S'
At the time of application:
filtered_data['date'] = datetime.strptime(tweet['created_at'], date_formats)
I get the following error:
time data 'Wed Oct 20 16:42:04 +0000 2021' does not match format '%d-%m-%Y'
Tell me how I can do this transformation and is it possible to do it at all
If you want a datetime object:
given
tweet['created_at'] = 'Wed Oct 20 16:42:04 +0000 2021'
use
input_format = '%a %b %d %H:%M:%S %z %Y'
filtered_data['date'] = datetime.strptime( \
tweet['created_at'], \
input_format)
results in
>>> filtered_data['date']
datetime(2021, 10, 20, 16, 42, 4, tzinfo=datetime.timezone.utc)
If you want a formatted string as '%d-%m-%Y':
given
tweet['created_at'] = 'Wed Oct 20 16:42:04 +0000 2021'
use
input_format = '%a %b %d %H:%M:%S %z %Y'
output_format = '%d-%m-%Y'
filtered_data['date'] = datetime.strptime( \
tweet['created_at'], \
input_format).strftime(output_format)
results in
>>> filtered_data['date']
'20-10-2021'
Reference
See python datetime.strptime and datetime.strftime
You need to use the current format first. See an example:
from datetime import datetime
my_date = 'Wed Oct 20 16:42:04 +0000 2021'
initial_format = '%a %b %d %H:%M:%S %z %Y'
final_format = '%d-%m-%Y'
new_date = datetime.strptime(my_date, initial_format).strftime(final_format)
print(new_date)
Output:
20-10-2021
So in your case, try:
filtered_data['date'] = datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S %z %Y').strftime(date_formats)

Convert string to date using datefinder

An issue occurs when I try to find a date in a .txt file using datefinder. I have the feeling I am unnecessarily switching between data types to obtain the result I desire.
Underneath is a MWE which results in generator object, which in turn is empty when changed to a list. I would like to obtain a datetime in the format %d-%m-%Y.
MWE:
import datefinder
f = ['this is text', 'this is a date', '* Model creation date: Sun Apr 25 08:52:06 2021']
for line in f:
if "creation date" in line:
date_line = str(line)
rev_date = datefinder.find_dates(_date_line)
dateutil's parser seems to do a better job:
import dateutil
f = ['this is text', 'this is a date', '* Model creation date: Sun Apr 25 08:52:06 2021']
dates = []
for line in f:
try:
dates.append(dateutil.parser.parse(line, fuzzy=True))
except dateutil.parser.ParserError:
pass
print(dates)
# [datetime.datetime(2021, 4, 25, 8, 52, 6)]
For the specific use-case:
for line in f:
if "* Model creation date:" in line:
rev_date = dateutil.parser.parse(line, fuzzy=True)
break
print(rev_date)
# 2021-04-25 08:52:06
Seems datefinder.find_dates works based on :. If you can remove : character after creation date get right result.
If always your string include creation date: you can remove this substring after if statement:
import datefinder
f = ['this is text', 'this is a date', '* Model creation date: Sun Apr 25 08:52:06 2021']
for line in f:
if "creation date" in line:
date_line = line.replace('creattion date:', '')
rev_date = datefinder.find_dates(date_line)

Exception/Error Handling in Python

I have a class that I created, at the end of the task I have to create two lists: one for tweets and the other for tweet labels.
After initiation, I want to load the tweets from a file and from another file their labels. After loading I want to check that each tweet is a json object and that it has no error, if it does, then I want to remove it and remove the associated label if provided. Labels are either 'pos' or 'neg'.
class flu_tweets:
def __init__(self):
self.tweets = [] #init create empty tweet list
self.labels = [] #init create empty label list
def load(self, tweets_filename, labels_filename = ''):
open_tweet_file = open(tweets_filename, 'r')
for tweet in open_tweet_file:
if tweet !='\n' and tweet != '\r\n':
self.tweets.extend([tweet])
if labels_filename != '':
open_label_file = open(labels_filename, 'r')
for label in open_label_file:
if label != '\n' and label != '\r\n':
if label[3:] != '\n':
self.labels.extend([label[:3]])
else:
self.labels.extend([label])
open_label_file.close()
index_tweet = 0
for tweet in self.tweets:
try:
json.loads(tweet)
index_tweet += 1
break
except:
print(index_tweet-1)
self.tweets.pop(index_tweet-1)
if self.labels != []:
self.labels.pop(index_tweet-1)
open_tweet_file.close()
Right now the method doesn't do that, and upon checking the list it does contain non-json objects.
Below is a copy of text file used that has tweets in it:
{"created_at":"Fri Oct 20 14:35:19 +0000 2017","id":921384339421745153,"id_str":"921384339421745153","text":"RT #alvindchipmunk: Dont let the DNC slide with no handcuffs. https://t.co/h72q7lGAHF","source":"\u003ca href=\"http://twitter.com/download/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":3435638633,"id_str":"3435638633","name":"alwaystrump","screen_name":"rodilosso_patty","location":"New Jersey, USA","url":null,"description":"Let's not give the media the Race war they want. POTUS we have your back! MAGA","translator_type":"none","protected":false,"verified":false,"followers_count":2770,"friends_count":2048,"listed_count":183,"favourites_count":85745,"statuses_count":152520,"created_at":"Sat Aug 22 16:27:18 +0000 2015","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http://abs.twimg.com/images/themes/theme1/bg.png","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme1/bg.png","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/773719268026257410/AuXU_l-D_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/773719268026257410/AuXU_l-D_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/3435638633/1461499638","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Fri Oct 20 12:27:04 +0000 2017","id":921352064160034816,"id_str":"921352064160034816","text":"Dont let the DNC slide with no handcuffs. https://t.co/h72q7lGAHF","display_text_range":[0,41],"source":"\u003ca href=\"http://twitter.com\" rel=\"nofollow\"\u003eTwitter Web Client\u003c/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":35962023,"id_str":"35962023","name":"alvin maldonado","screen_name":"alvindchipmunk","location":"Bunnell, FL","url":"http://alvindchipmunk-theconservativecomet.blogspot.com/","description":"Artist-musician-Medical Professional-Patriot-Guns-God-Country & 2Unite with others of like mind 2 re-elect Trump, redecorate DC making d USA gr8 as it still is","translator_type":"none","protected":false,"verified":false,"followers_count":1660,"friends_count":2051,"listed_count":43,"favourites_count":2001,"statuses_count":16294,"created_at":"Tue Apr 28 02:43:18 +0000 2009","utc_offset":-14400,"time_zone":"Eastern Time (US & Canada)","geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"9818A1","profile_background_image_url":"http://pbs.twimg.com/profile_background_images/627772771741732864/MHLgViA4.jpg","profile_background_image_url_https":"https://pbs.twimg.com/profile_background_images/627772771741732864/MHLgViA4.jpg","profile_background_tile":true,"profile_link_color":"981CEB","profile_sidebar_border_color":"DE3C88","profile_sidebar_fill_color":"E887E8","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/542852260422639616/75bqMWY3_normal.jpeg","profile_image_url_https":"https://pbs.twimg.com/profile_images/542852260422639616/75bqMWY3_normal.jpeg","profile_banner_url":"https://pbs.twimg.com/profile_banners/35962023/1481464103","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"quoted_status_id":920847040132792320,"quoted_status_id_str":"920847040132792320","quoted_status":{"created_at":"Thu Oct 19 03:00:17 +0000 2017","id":920847040132792320,"id_str":"920847040132792320","text":"I'm sick of all the evidence against the Democrats and no handcuffs. Retweet-\nif you agree!\n\n#realDonaldTrump \ud83c\uddfa\ud83c\uddf8","source":"\u003ca href=\"http://twitter.com/download/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":889982846428782592,"id_str":"889982846428782592","name":"c\u2113\u03b9\u03b7\u0442\u03c3\u03b7 \u043c\u03b9c\u043d\u03b1\u03b5\u2113","screen_name":"crusher614","location":"*not of this world","url":"https://www.youtube.com/channel/UCthNh_qChVqAh_zamo0IQxQ","description":"FMR U.S. Border Patrol | New Mexico SWAT Operator | Legend Who Lives Rent Free In The Minds of Liberals World Wide. #MAGA \u03bc\u03bf\u03bb\u1f7c\u03bd \u03bb\u03b1\u03b2\u03ad III","translator_type":"none","protected":false,"verified":false,"followers_count":19821,"friends_count":135,"listed_count":73,"favourites_count":16032,"statuses_count":7708,"created_at":"Tue Jul 25 22:57:00 +0000 2017","utc_offset":-25200,"time_zone":"America/Phoenix","geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http://abs.twimg.com/images/themes/theme1/bg.png","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme1/bg.png","profile_background_tile":false,"profile_link_color":"19CF86","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"000000","profile_text_color":"000000","profile_use_background_image":false,"profile_image_url":"http://pbs.twimg.com/profile_images/916414292882219008/fvSIJCC6_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/916414292882219008/fvSIJCC6_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/889982846428782592/1506312325","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"quote_count":551,"reply_count":703,"retweet_count":15439,"favorite_count":13676,"entities":{"hashtags":[],"urls":[],"user_mentions":[{"screen_name":"realDonaldTrump","name":"Donald J. Trump","id":25073877,"id_str":"25073877","indices":[93,109]}],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en"},"is_quote_status":true,"quote_count":0,"reply_count":0,"retweet_count":1,"favorite_count":0,"entities":{"hashtags":[],"urls":[{"url":"https://t.co/h72q7lGAHF","expanded_url":"https://twitter.com/crusher614/status/920847040132792320","display_url":"twitter.com/crusher614/sta\u2026","indices":[42,65]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en"},"quoted_status_id":920847040132792320,"quoted_status_id_str":"920847040132792320","quoted_status":{"created_at":"Thu Oct 19 03:00:17 +0000 2017","id":920847040132792320,"id_str":"920847040132792320","text":"I'm sick of all the evidence against the Democrats and no handcuffs. Retweet-\nif you agree!\n\n#realDonaldTrump \ud83c\uddfa\ud83c\uddf8","source":"\u003ca href=\"http://twitter.com/download/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":889982846428782592,"id_str":"889982846428782592","name":"c\u2113\u03b9\u03b7\u0442\u03c3\u03b7 \u043c\u03b9c\u043d\u03b1\u03b5\u2113","screen_name":"crusher614","location":"*not of this world","url":"https://www.youtube.com/channel/UCthNh_qChVqAh_zamo0IQxQ","description":"FMR U.S. Border Patrol | New Mexico SWAT Operator | Legend Who Lives Rent Free In The Minds of Liberals World Wide. #MAGA \u03bc\u03bf\u03bb\u1f7c\u03bd \u03bb\u03b1\u03b2\u03ad III","translator_type":"none","protected":false,"verified":false,"followers_count":19821,"friends_count":135,"listed_count":73,"favourites_count":16032,"statuses_count":7708,"created_at":"Tue Jul 25 22:57:00 +0000 2017","utc_offset":-25200,"time_zone":"America/Phoenix","geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http://abs.twimg.com/images/themes/theme1/bg.png","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme1/bg.png","profile_background_tile":false,"profile_link_color":"19CF86","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"000000","profile_text_color":"000000","profile_use_background_image":false,"profile_image_url":"http://pbs.twimg.com/profile_images/916414292882219008/fvSIJCC6_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/916414292882219008/fvSIJCC6_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/889982846428782592/1506312325","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"quote_count":551,"reply_count":703,"retweet_count":15439,"favorite_count":13676,"entities":{"hashtags":[],"urls":[],"user_mentions":[{"screen_name":"realDonaldTrump","name":"Donald J. Trump","id":25073877,"id_str":"25073877","indices":[93,109]}],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en"},"is_quote_status":true,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[{"url":"https://t.co/h72q7lGAHF","expanded_url":"https://twitter.com/crusher614/status/920847040132792320","display_url":"twitter.com/crusher614/sta\u2026","indices":[62,85]}],"user_mentions":[{"screen_name":"alvindchipmunk","name":"alvin maldonado","id":35962023,"id_str":"35962023","indices":[3,18]}],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1508510119668"}
{"created_at":"Fri Oct 20 14:35:19 +0000 2017","id":921384340113670149,"id_str":"921384340113670149","text":"RT #mitchelmusso: My girl is so fine! but gaw}
Now that last tweet is incomplete,I expect that my function should raise an error and eliminate it.

How to calculate the total time a log file covers in Python 2.7?

So I have several log files, they are structured like this:
Sep 9 12:42:15 apollo sshd[25203]: pam_unix(sshd:auth): authentication failure; logname= uid=0 euid=0 tty=ssh ruser= rhost=189.26.255.11
Sep 9 12:42:15 apollo sshd[25203]: pam_succeed_if(sshd:auth): error retrieving information about user ftpuser
Sep 9 12:42:17 apollo sshd[25203]: Failed password for invalid user ftpuser from 189.26.255.11 port 44061 ssh2
Sep 9 12:42:17 apollo sshd[25204]: Received disconnect from 189.26.255.11: 11: Bye Bye
Sep 9 19:12:46 apollo sshd[30349]: Did not receive identification string from 199.19.112.130
Sep 10 03:29:48 apollo unix_chkpwd[4549]: password check failed for user (root)
Sep 10 03:29:48 apollo sshd[4546]: pam_unix(sshd:auth): authentication failure; logname= uid=0 euid=0 tty=ssh ruser= rhost=221.12.29.170 user=root
Sep 10 03:29:51 apollo sshd[4546]: Failed password for root from 221.12.29.170 port 56907 ssh2
There are more dates and times, But this is an example. I was wondering how I would calculate the total time that the file covers. I've tried a few things, and have had about 5 hours of no success.
I tried this first, and it was close, but it didn't work like I wanted it to, it kept repeating dates:
with open(filename, 'r') as file1:
lines = file1.readlines()
for line in lines:
linelist = line.split()
date2 = int(linelist[1])
time2 = linelist[2]
print linelist[0], linelist[1], linelist[2]
if date1 == 0:
date1 = date2
dates.append(linelist[0] + ' ' + str(linelist[1]))
if date1 < date2:
date1 = date2
ttimes.append(datetime.strptime(str(ltime1), FMT) - datetime.strptime(str(time1), FMT))
time1 = '23:59:59'
ltime1 = '00:00:00'
dates.append(linelist[0] + ' ' + str(linelist[1]))
if time2 < time1:
time1 = time2
if time2 > ltime1:
ltime1 = time2
If the entries are in a chronological order, you can just look at the first and at the last entry:
entries = lines.split("\n")
first_date = entries[0].split("apollo")[0]
last_date = entries[len(entries)-1].split("apollo")[0]
We don't have the year, so I took the current year. Read all the lines, convert the month to month index, and parse each date.
Then sort it (so works even if logs mixed) and take first & last item. Substract. Enjoy.
from datetime import datetime
months = ["","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
current_year = datetime.now().year
dates = list()
with open(filename, 'r') as file1:
for line in file1:
linelist = line.split()
if linelist: # filter out possible empty lines
linelist[0] = str(months.index(linelist[0])) # convert 3-letter months to index
date2 = int(linelist[1])
z=datetime.strptime(" ".join(linelist[0:3])+" "+str(current_year),"%m %d %H:%M:%S %Y") # compose & parse the date
dates.append(z) # store in list
dates.sort() # sort the list
first_date = dates[0]
last_date = dates[-1]
# print report & compute time span
print("start {}, end {}, time span {}".format(first_date,last_date,last_date-first_date))
result:
start 2016-09-09 12:42:15, end 2016-09-10 03:29:51, time span 14:47:36
Note that it won't work properly between december 31st and january the 1st because of the missing year info. I suppose we could make a guess if we find January & December in the log then assume that it's january from the next year. Unsupported yet.

Python time.strptime() gives wrong result?

I'm trying to parse a mbox format email spool.
I have code that does this:
if string.find(line, 'Date: ') == 0:
try:
when = time.mktime(time.strptime(line[6:30], "%a, %d %b %Y %H:%M:%S"))
Usually it seems to work OK, except that when line = 'Date: Sat, 17 Apr 2004 22:29:37 -0400\n'
it seems to give the wrong result (22:29:03 instead of 22:29:37).
Here's my pdb trace:
(Pdb) p line
'Date: Sat, 17 Apr 2004 22:29:37 -0400\n'
(Pdb) p time.strptime(line[6:30], "%a, %d %b %Y %H:%M:%S")
time.struct_time(tm_year=2004, tm_mon=4, tm_mday=17, tm_hour=22, tm_min=29, tm_sec=3, tm_wday=5, tm_yday=108, tm_isdst=-1)
(Pdb)
The result seems to be off by 34 seconds. What am I doing wrong?
You are slicing your line too short; the second value is exclusive, not inclusive:
>>> line[6:30]
'Sat, 17 Apr 2004 22:29:3'
>>> line[6:31]
'Sat, 17 Apr 2004 22:29:37'
>>> time.strptime(line[6:31], "%a, %d %b %Y %H:%M:%S")
time.struct_time(tm_year=2004, tm_mon=4, tm_mday=17, tm_hour=22, tm_min=29, tm_sec=37, tm_wday=5, tm_yday=108, tm_isdst=-1)

Categories