I just got to know the world of programming and python was the first thing I learned.. This program already can extract data from .txt file and send it to API..
But the things is I don't know how to delete the file,after the data have been extracted and send to the API... Here is my coding...
from fileinput import close
import os
import requests
from datetime import datetime
import glob
import time
data_send_list = []
path = "./file"
'''File List'''
file_name = []
URL = 'http://.......'
def main():
print("Main Def" "\n")
data_send_list = scan_files(path)
for json in data_send_list:
send_api(URL, json)
def read_text_file(file_path):
with open (file_path, 'r') as file:
data = file.readlines()
return data_dictionary
def scan_files(path):
list = []
for file in glob.glob("*.txt"):
return list
def send_api(url,json,):
requests_session = requests.session()
post_api = requests_session.post(url,data=json)
print("Sending API")
if(post_api.status_code >= 200 and post_api.status_code <300):
print("Successful. Status code: ",post_api.status_code)
#i hope that i can delete the file here
print("Failed to send to API. Status code: ",post_api.status_code)
return post_api.status_code
I was hoping that if the data can be sent to API... and give output "status code: 200" the data file will be deleted... while the data that is not sent, the file will remain
There would be a lot of better ways other than my answer.
import os
def send_api(url,json,path): # You need to add function parameter path to use at this function
requests_session = requests.session()
post_api = requests_session.post(url,data=json)
print("Sending API")
if(post_api.status_code >= 200 and post_api.status_code <300):
print("Successful. Status code: ",post_api.status_code)
os.remove(path) # use os.remove function to remove file
print("Failed to send to API. Status code: ",post_api.status_code)
return post_api.status_code
I am new to python and trying to understanding how to automate stuff. I have a folder in which 5 csv files get updated daily, however sometimes one of them or two dont on particular days. Im having to manually check this folder. Instead I want to automate this in such a way that if a csv file does not update in the last 24hours, It can send an email to myself alerting me of this.
My code:
import datetime
import glob
import os
import smtplib
import string
now = datetime.datetime.today() #Get current date
list_of_files = glob.glob('c:/Python/*.csv') # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime) #get latest file created in folder
newestFileCreationDate = datetime.datetime.utcfromtimestamp(os.path.getctime(latest_file)) # get creation datetime of last file
dif = (now - newestFileCreationDate) #calculating days between actual date and last creation date
logFile = "c:/Python/log.log" #defining a log file
def checkFolder(dif, now, logFile):
if dif > datetime.timedelta(days = 1): #Check if difference between today and last created file is greater than 1 days
HOST = "" #This must be your smtp server ip
SUBJECT = "Alert! At least 1 day wthout a new file in folder xxxxxxx"
TO = "xx.t#gmail.com"
FROM = "xx.t#gmail.com"
text = "%s - The oldest file in folder it's %s old " %(now, dif)
BODY = string.join((
"From: %s" % FROM,
"To: %s" % TO,
"Subject: %s" % SUBJECT ,
), "\r\n")
server = smtplib.SMTP(HOST)
server.sendmail(FROM, [TO], BODY)
file = open(logFile,"a") #Open log file in append mode
file.write("%s - [WARNING] The oldest file in folder it's %s old \n" %(now, dif)) #Write a log
else : # If difference between today and last creation file is less than 1 days
file = open(logFile,"a") #Open log file in append mode
file.write("%s - [OK] The oldest file in folder it's %s old \n" %(now, dif)) #write a log
checkFolder(dif,now,logFile) #Call function and pass 3 arguments defined before
However, this does not run without error and I just want to be notified by mail of those files in the folder that havent been updated. even if it is one of out 5 files of them or 5 out of 5 that havent updated.
Use pure python and concise way
import hashlib
import glob
import json
import smtplib
from email.message import EmailMessage
import time
import schedule #pip install schedule
hasher = hashlib.md5()
size = 65536 #to read large files in chunks
list_of_files = glob.glob('./*.csv') #absolute path for crontab
Part 1) Run this script first then comment it out. It will create a json file with hashes of your files.
first_hashes = {}
for x in list_of_files:
with open(x, 'rb') as f:
buf = f.read(size)
while len(buf) > 0:
buf = f.read(size)
first_hashes[x] = hasher.hexdigest()
with open('hash.json', 'w') as file:
file.write(json.dumps(first_hashes, indent=2))
Now comment it out or even delete it.
Part 2) Automation script:
def send_email():
check_hash = {} #Contain hashes that have not changed
with open('hash.json') as f: #absolute path for crontab
data = json.load(f)
for x in list_of_files:
with open(x, 'rb') as f:
buf = f.read(size)
while len(buf) > 0:
buf = f.read(size)
new_hash = hasher.hexdigest()
#if a hash match with one in data, that file has not changed
if new_hash in data.values():
check_hash[x] = new_hash
data[x] = new_hash
#update our hashes
with open('hash.json', 'w') as file: #absolute path for crontab
file.write(json.dumps(data, indent=2))
if len(check_hash) > 0: #check if there's anything in check_hash
filename="check_hash.txt" #absolute path for crontab
#write to a text file named "check_hash.txt"
with open(filename, 'w') as f: #absolute path for crontab
f.write(json.dumps(check_hash, indent=2))
# for gmail smtp setup watch youtu.be/JRCJ6RtE3xU
EMAIL_ADDRESS = 'SMTPAddress#gmail.com'
msg = EmailMessage()
msg['Subject'] = 'Unupdated files'
msg['From'] = EMAIL_ADDRESS
msg['To'] = 'receive#gmail.com'
msg.set_content('These file(s) did not update:')
msg.add_attachment(open(filename, "r").read(), filename=filename)
with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
#for faster testing check other options here github.com/dbader/schedule
while 1:
EDIT: If you restart your pc, you will need to run this file again to restart schedule, to avoid that, you can use crontab as follows (learn how from youtu.be/j-KgGVbyU08):
# mm hh DOM MON DOW command
30 10 * * * python3 path-to-file/email-script.py #Linux
30 10 * * * python path-to-file/email-script.py #Windows
This will run the script everyday at 10:30 AM IF the pc is ON at that time. For faster testing (run every 1 minute) use:
* * * * * python3 path-to-file/email-script.py
NOTE: If you gonna use crontab, you MUST use absolute path for all file references and replace
while 1:
if __name__ == "__main__":
Tested and it's working great!
Granted I don't know CSV but I would import time and using the format and time. Sleep function create a timer. What's good about time module is that you can configure it to set a value to a variable after time is up. SO maybe if you do that and put into an if statement, when the variable reaches a value, send the email.
Are you thinking of something like this?
import os
from datetime import datetime
import smtplib
import textwrap
def send_email_failure():
SERVER = "" #This must be your smtp server ip
SUBJECT = "Alert! At least 1 day without a new file in folder xxxxxxx"
TO = "xx.t#gmail.com"
FROM = "xx.t#gmail.com"
TEXT = "%s - The oldest file in folder it's %sh old " %(datetime.now(), oldest_time_hour)
"""this is some test documentation in the function"""
message = textwrap.dedent("""\
From: %s
To: %s
Subject: %s
""" % (FROM, ", ".join(TO), SUBJECT, TEXT))
# Send the mail
server = smtplib.SMTP(SERVER)
server.sendmail(FROM, TO, message)
def save_log(logFile, ok_or_failure, time_now, delta):
file = open(logFile,"a") #Open log file in append mode
if ok_or_failure != 'ok':
file.write("%s - [WARNING] The oldest file in folder it's %s old \n" %(time_now, delta))
file.write("%s - [OK] The oldest file in folder it's %s old \n" %(time_now, delta))
def check_file(filename):
if filename.endswith('.csv'):
mtime = os.path.getmtime(filename) # get modified time
except OSError:
mtime = 0
last_modified_date = datetime.fromtimestamp(mtime)
tdelta = datetime.now() - last_modified_date
hours = tdelta.seconds // 3600 # convert to hours
return hours
return 0
# we check what files are in the dir 'files'
# and their modification time
oldest_time_hour = 0
for path, dirs, files in os.walk('./files'): # this need to be modified by case
for file in files:
# get each file time of modification
time = check_file(path+'/'+file)
if time > 0:
# save the oldest time
if time > oldest_time_hour:
oldest_time_hour = time
# if it is older that 24h
if oldest_time_hour > 24:
save_log('log.log', 'failure', datetime.now(), oldest_time_hour)
save_log('log.log', 'ok', datetime.now(), oldest_time_hour)
also you will need an end-less loop to run the python script or a chronjob to run this python script every hour or so
Why are you checking the last_modified_date? I suggest you to check the modification of the file with md5 checksum.
My Idea is, if you have following files :
You can check their md5 checksum and write the result + DateTime into a file next to the original file. like following :
Content of file1.csv_checksum
you can check md5 of a file with following code:
>>> import hashlib
>>> hashlib.md5(open('filename.exe','rb').read()).hexdigest()
then you can check the result with the provided one in the checksum file ( and if the checksum file does not exist, just create it for the first time )
I think you can easily handle it with this approach.
At first i started with a task scheduler decorator which will enable you to poll a directory for a fixed delay:
import time
import functools
def scheduled(fixed_delay):
def decorator_scheduled(func):
def wrapper_schedule(*args, **kwargs):
result = func(*args, **kwargs)
self = args[0]
delay = getattr(self, fixed_delay)
return result
return wrapper_schedule
return decorator_scheduled
Saved it as a seperate module named task_scheduler.py.
I will use it in my file watcher:
import os
from task_scheduler import scheduled
import smtplib, ssl
class FileWatcher:
def __init__(self,
self.files_path = files_path
self.extension = extension
self.poll_delay = poll_delay
def notify_host_on_nonchange(self, file_path):
port = 465
smtp_server = "smtp.gmail.com"
sender_email = "sender#gmail.com"
receiver_email = "receiver#gmail.com"
password = "Your password here" #You may want to read it from file
message = f"No change in file: {file_path} for 24 hurs!"
context = ssl.create_default_context()
with smtplib.SMTP_SSL(smtp_server, port, context=context) as server:
server.login(sender_email, password)
server.sendmail(sender_email, receiver_email, message)
def watch(self):
while True:
except KeyboardInterrupt:
log.debug('Polling interrupted by user.')
def poll_(self,):
for f in os.listdir(self.files_path):
full_path = os.path.join(self.files_path, f)
path_stat = os.stat(full_path)
_, file_ext = os.path.splitext(f)
ctime = path_stat.st_ctime
diff = time.time() - ctime/3600
if diff<=24 or not S_ISREG(path_stat.st_mode) or str(file_ext) != self.extension:
if __name__ == "__main__":
file_listener = FileWatcher()
Above class defines a poll_ function which benefits from os.stat module to check the modification time. If modification time smaller than or equal to 24 or the file is not a regular file (means that it is a directory) or it does not have the extension you look for polling will skip it, otherwise calls the notify function to send e-mail. It uses the gmail smtp server example but you can change it as appropriate for your environment. Watch function is a wrapper for continous polling.
This class is adapted from my machine learning model watcher and loader, you can access that version and project from my github. For further explanation about decorator and script you can check out my medium post.
I wrote a bot that fetches posts from Reddit and posts them on a Twitter Account. But sometimes - i dont know why - it posts twice in a row, and not once every 3 hours. I am suspecting that it is because i did something like this:
do stuff:
if stuff doesnt already exist:
do other stuff
do stuff
And i really think its bad practice, but i cant figure out how else i can let it run in an infinite loop, but still try to get a post that hasnt been posted before.
I have two points in my code where i "re-run" the entire code upon a check. One is when the post that is fetched from reddit is not an image, the other when the post that was fetched was already posted before (and stored in a json file for this exact check).
I hope someone understands what i mean, thanks in advance.
import time
import tweepy
import datetime
import praw
import urllib.request
import os
import json
def Mainbot():
reddit = praw.Reddit(client_id='X',
user_agent='RedditFetchBot by FlyingThunder')
def Tweet(postinfo):
auth = tweepy.OAuthHandler("X", "X")
auth.set_access_token("X", "X")
api = tweepy.API(auth)
api.update_with_media("local-filename.jpg", postinfo)
print("not a file post")
Mainbot() #check 1
post = reddit.subreddit('okbrudimongo').random()
x = post.id
with open('data.json', 'r') as e:
eread = e.read()
if x not in eread:
with open('data.json', 'a') as f:
json.dump(x, f)
print("already posted")
Mainbot() #check 2
print(post.url + " " + post.title)
urllib.request.urlretrieve(post.url, "local-filename.jpg")
Tweet(postinfo=post.title+" (https://www.reddit.com" + post.permalink+")")
print("Datei nicht vorhanden")
def loop():
print("still running")
while True:
By the way, here is what it gives back - i made print checks to see what goes wrong, here you can see what it says when it posts twice
still running
2019-09-24 13:27:23.437152
still running
2019-09-24 13:57:23.437595
already posted
https://i.redd.it/xw38s1qrmlh31.jpg Führ Samstag bai ihm
https://i.redd.it/nnaxll9gjwf31.jpg Sorri Mamer
still running
2019-09-24 14:27:39.913651
still running
2019-09-24 14:57:39.913949
still running
2019-09-24 15:27:39.914013
There's quite a bit to unpack here.
if x not in eread:
Mainbot() # <--- this line
in the above snippet, you check if the post.id is already in your file. and if it is, you call the function Mainbot() again which means it has another chance to post a tweet.
However, this line
Tweet(postinfo=post.title+" (https://www.reddit.com" + post.permalink+")")
Occurs outside if your if-else check, which means it will post a tweet regardless of whether or not the post.id was in your file.
I also want to address your method of looping the bot. Your use of recursion is causing your double-posting issue and could technically recursively loop a post many tweets at once if multiple posts in a row end up in the "else" branch listed above.
Also, if you are using python with open(...) as f: you don't need to call python f.close()
Here is a solution I came up with that should solve your problem and doesn't use recursion:
import time
import tweepy
import datetime
import praw
import urllib.request
import os
import json
def initBot():
# this function logs into your reddit and twitter accounts
# and returns their instances
reddit = praw.Reddit(client_id='XXXX',
user_agent='RedditFetchBot by FlyingThunder')
auth = tweepy.OAuthHandler("XXXX", "XXXX")
twitter = tweepy.API(auth)
return reddit, twitter
def Tweet(post):
# this function simply tries to post a tweet
postinfo = post.title + " (https://www.reddit.com" + post.permalink + ")"
twitter.update_with_media("local-filename.jpg", postinfo)
print("not a file post"+post.permalink)
def Mainbot():
while True:
with open('data.json', 'r+') as e: # 'r+' let's you read and write to a file
eread = e.read()
# This section loops until it finds a reddit submission
# that's not in your file
post = reddit.subreddit('okbrudimongo').random()
x = post.id
while x in eread:
post = reddit.subreddit('okbrudimongo').random()
x = post.id
# add the post.id to the file
json.dump(x, e)
print(post.url + " " + post.title)
# Get and tweet image
urllib.request.urlretrieve(post.url, "local-filename.jpg")
# Remove image file
print("Datei nicht vorhanden")
# sleep for a total of three hours, but report status every 30 minutes
for i in range(6):
print("still running")
if __name__ == "__main__":
reddit, twitter = initBot()
I haven't tested this because I don't have twitter keys.
Solution i found (i still dont fully understand what mechanic caused the bug):
import time
import tweepy
import datetime
import praw
import urllib.request
import os
import json
def Mainbot():
reddit = praw.Reddit(client_id='XXXX',
user_agent='RedditFetchBot by FlyingThunder')
def Tweet(postinfo):
auth = tweepy.OAuthHandler("XXXX", "XXXX")
api = tweepy.API(auth)
api.update_with_media("local-filename.jpg", postinfo)
print("not a file post"+post.permalink)
post = reddit.subreddit('okbrudimongo').random()
x = post.id
with open('data.json', 'r') as e:
eread = e.read()
if x not in eread:
with open('data.json', 'a') as f:
json.dump(x, f)
print(post.url + " " + post.title)
urllib.request.urlretrieve(post.url, "local-filename.jpg")
Tweet(postinfo=post.title + " (https://www.reddit.com" + post.permalink + ")")
print("Datei nicht vorhanden")
print("already posted")
def loop():
print("still running")
while True:
I have a url :"https://findicons.com/files/icons/2787/beautiful_flat_icons/128/running.png"
I want to get the image and write it to file , i write the code as follow:
import urllib.request
web = urllib.request.urlopen(iturl)
itdata = web.read()
f = open(str(cou) + '.png', "wb")
cou = cou + 1
My question is ,if i have many urls to download ,how can i implement it by coroutine of tornado?
This isn't the entire code, just something I came up with in 5 mins but it should give you enough information to satisfy your requirements. If you have any questions or further explanation is required, please let me know.
from tornado import gen, httpclient, ioloop
def main():
client = httpclient.AsyncHTTPClient()
response = yield client.fetch(
follow_redirects = True)
def download_image(response):
buffer_size = 1024
filename = response.request.url.split('/')[-1] # this is not always reliable
with open(filename, 'ab') as img:
while True:
chunk = response.buffer.read(buffer_size)
if chunk == '':
Tornado Issue #1616
Examples of RequestHandler
so im trying to write this JSON from the Kik smiley site, and im trying to do this so I wont have to write it manually, anyways I need to parse the JSON so only some of the existing JSON shows up in the file (basically cleaning it) what I need from the site is... (name, id, type) how would I do this?
I have written this in python but it seems to fail, and im not 100% sure as to why. I am new to Python, so sorry if this is an obvious question! I did find something earlier but it just confused me even more :) Thank you!
import requests, json, sys
from colorama import init
from termcolor import colored
class SmileyGrabber():
def __init__(self):
# requests vars
self.smileysFound = 0
self.smileysLost = 0
self.session = requests.Session()
def grabSmiley(self):
while True:
r = self.session.get(self.url)
j = r.json()
if j["IsSuccess"] == True:
meta = j["smileys"]
sID = meta["id"]
sType = meta["type"]
sName = meta["name"]
FormatSmileyData(sID, sType, sName)
print "Smiley Found:", colored("({0})".format(sName), "cyan")
self.smileysFound += 1
print(colored("Could not grab smiley"), "red")
self.smileysLost += 1
except KeyboardInterrupt:
print r.text
class FormatSmileyData(object):
def __init__(self, sID, sType, sName):
smileyData = {}
data = []
data.append({"SMILEY_ID":sID, "SMILEY_TYPE":sType, "SMILEY_NAME":sName})
dataFile = open("smileys.json", "a+")
if __name__ == "__main__":
There are a number of problems with your code.
It will be more efficient to read from the network all at once
rather than making a call to session.get for each smiley.
j does not have an "IsSuccess" element, so that will never be true
j["smileys"] is a list, so to get the dictionaries (which represent each smiley) you will need to iterate through that list.
You are appending data into data but you are writing from
smileyData, which never has any data entered into it.
Each time you call the FormatSmileyData constructor, you are
resetting the data.
Take a look at a tool like Postman to prettify the JSON so you can see the structure. This can help figure out how to parse it.
Here's an updated version of your script that appears to work:
I removed the colorization and made it work with Python 3.
import requests, json, sys
class SmileyGrabber():
def __init__(self):
# requests vars
self.smileysFound = 0
self.smileysLost = 0
self.session = requests.Session()
self.data = []
def grabSmiley(self):
r = self.session.get(self.url)
j = r.json()
print ("got json")
print (str(len(j)))
for element in j:
for meta in element["smileys"]:
print ("---------------")
print (str(meta))
sID = meta["id"]
sType = meta["type"]
sName = meta["name"]
self.addSmileyData(sID, sType, sName)
print ("Smiley Found:" + "({0})".format(sName))
self.smileysFound += 1
print ("found " + str(self.smileysFound))
print("Could not grab smiley")
self.smileysLost += 1
def addSmileyData(self, sID, sType, sName):
self.data.append({"SMILEY_ID":sID, "SMILEY_TYPE":sType, "SMILEY_NAME":sName})
def writeSmileyData(self):
dataFile = open("smileys.json", "a+")
if __name__ == "__main__":
I’m trying to make a program that will do the following:
check if auth_file exists
if yes -> read file and try to login using data from that file
- if data is wrong -> request new data
if no -> request some data and then create the file and fill it with requested data
So far:
import json
import getpass
import os
import requests
filename = ".auth_data"
auth_file = os.path.realpath(filename)
url = 'http://example.com/api'
headers = {'content-type': 'application/json'}
def load_auth_file():
f = open(auth_file, "r")
auth_data = f.read()
r = requests.get(url, auth=auth_data, headers=headers)
if r.reason == 'OK':
return auth_data
print "Incorrect login..."
except IOError:
f = file(auth_file, "w")
def req_auth():
user = str(raw_input('Username: '))
password = getpass.getpass('Password: ')
auth_data = (user, password)
r = requests.get(url, auth=auth_data, headers=headers)
if r.reason == 'OK':
return user, password
elif r.reason == "FORBIDDEN":
print "Incorrect login information..."
return False
I have the following problems(understanding and applying the correct way):
I can't find a correct way of storing the returned data from req_auth() to auth_file in a format that can be read and used in load_auth file
PS: Of course I'm a beginner in Python and I'm sure I have missed some key elements here :(
To read and write data, you can use json:
>>> with open('login.json','w') as f:
f.write(json.dumps({'user': 'abc', 'pass': '123'}))
>>> with open('login.json','r') as f:
>>> print data
{u'user': u'abc', u'pass': u'123'}
A few improvements I'd suggest:
Have a function that tests login (arguments: user,pwd) and returns True/False
Save data inside req_data, because req_data is called only when you have incorrect/missing data
Add an optional argument tries=0 to req_data, and test against it for a maximum number of tries
def check_login(user,pwd):
r = requests.get(url, auth=(user, pwd), headers=headers)
return r.reason == 'OK':
for (2), you can use json (as described above), csv, etc. Both of those are extremely easy, though json might make more sense since you're already using it.
for (3):
def req_auth(tries = 0) #accept an optional argument for no. of tries
#your existing code here
if check_login(user, password):
#Save data here
if tries<3: #an exit condition and an error message:
req_auth(tries+1) #increment no. of tries on every failed attempt
print "You have exceeded the number of failed attempts. Exiting..."
There are a couple of things I would approach differently, but you're off to a good start.
Instead of trying to open the file initially I'd check for it's existence:
if not os.path.isfile(auth_file):
Next, when you're working with writing the output you should use context managers:
with open(auth_file, 'w') as fh:
And finally, as a storage open (not terribly secure), it might work well to put the information you're saving in json format:
userdata = dict()
userdata['username'] = raw_input('Username: ')
userdata['password'] = getpass.getpass('Password: ')
# saving
with open(auth_file, 'w') as fho:
# loading
with open(auth_file) as fhi:
userdata = json.loads(fhi.read())