I wrote a bot that fetches posts from Reddit and posts them on a Twitter Account. But sometimes - i dont know why - it posts twice in a row, and not once every 3 hours. I am suspecting that it is because i did something like this:
do stuff:
if stuff doesnt already exist:
do other stuff
else:
do stuff
And i really think its bad practice, but i cant figure out how else i can let it run in an infinite loop, but still try to get a post that hasnt been posted before.
I have two points in my code where i "re-run" the entire code upon a check. One is when the post that is fetched from reddit is not an image, the other when the post that was fetched was already posted before (and stored in a json file for this exact check).
I hope someone understands what i mean, thanks in advance.
import time
import tweepy
import datetime
import praw
import urllib.request
import os
import json
def Mainbot():
reddit = praw.Reddit(client_id='X',
client_secret='X',
user_agent='RedditFetchBot by FlyingThunder')
def Tweet(postinfo):
auth = tweepy.OAuthHandler("X", "X")
auth.set_access_token("X", "X")
api = tweepy.API(auth)
try:
api.update_with_media("local-filename.jpg", postinfo)
except:
print("not a file post")
Mainbot() #check 1
post = reddit.subreddit('okbrudimongo').random()
x = post.id
with open('data.json', 'r') as e:
eread = e.read()
if x not in eread:
with open('data.json', 'a') as f:
json.dump(x, f)
f.close()
e.close()
else:
e.close()
print("already posted")
Mainbot() #check 2
print(post.url + " " + post.title)
urllib.request.urlretrieve(post.url, "local-filename.jpg")
Tweet(postinfo=post.title+" (https://www.reddit.com" + post.permalink+")")
try:
time.sleep(5)
os.remove("local-filename.jpg")
except:
print("Datei nicht vorhanden")
def loop():
time.sleep(1800)
print("still running")
print(datetime.datetime.now())
while True:
Mainbot()
loop()
loop()
loop()
loop()
loop()
loop()
By the way, here is what it gives back - i made print checks to see what goes wrong, here you can see what it says when it posts twice
still running
2019-09-24 13:27:23.437152
still running
2019-09-24 13:57:23.437595
already posted
https://i.redd.it/xw38s1qrmlh31.jpg Führ Samstag bai ihm
https://i.redd.it/nnaxll9gjwf31.jpg Sorri Mamer
still running
2019-09-24 14:27:39.913651
still running
2019-09-24 14:57:39.913949
still running
2019-09-24 15:27:39.914013
There's quite a bit to unpack here.
if x not in eread:
...
else:
...
Mainbot() # <--- this line
in the above snippet, you check if the post.id is already in your file. and if it is, you call the function Mainbot() again which means it has another chance to post a tweet.
However, this line
Tweet(postinfo=post.title+" (https://www.reddit.com" + post.permalink+")")
Occurs outside if your if-else check, which means it will post a tweet regardless of whether or not the post.id was in your file.
I also want to address your method of looping the bot. Your use of recursion is causing your double-posting issue and could technically recursively loop a post many tweets at once if multiple posts in a row end up in the "else" branch listed above.
Also, if you are using python with open(...) as f: you don't need to call python f.close()
Here is a solution I came up with that should solve your problem and doesn't use recursion:
import time
import tweepy
import datetime
import praw
import urllib.request
import os
import json
def initBot():
# this function logs into your reddit and twitter accounts
# and returns their instances
reddit = praw.Reddit(client_id='XXXX',
client_secret='XXXX',
user_agent='RedditFetchBot by FlyingThunder')
auth = tweepy.OAuthHandler("XXXX", "XXXX")
auth.set_access_token("XXXX",
"XXXX")
twitter = tweepy.API(auth)
return reddit, twitter
def Tweet(post):
# this function simply tries to post a tweet
postinfo = post.title + " (https://www.reddit.com" + post.permalink + ")"
try:
twitter.update_with_media("local-filename.jpg", postinfo)
except:
print("not a file post"+post.permalink)
def Mainbot():
while True:
with open('data.json', 'r+') as e: # 'r+' let's you read and write to a file
eread = e.read()
# This section loops until it finds a reddit submission
# that's not in your file
post = reddit.subreddit('okbrudimongo').random()
x = post.id
while x in eread:
post = reddit.subreddit('okbrudimongo').random()
x = post.id
# add the post.id to the file
json.dump(x, e)
print(post.url + " " + post.title)
# Get and tweet image
urllib.request.urlretrieve(post.url, "local-filename.jpg")
Tweet(post)
# Remove image file
try:
time.sleep(5)
os.remove("local-filename.jpg")
except:
print("Datei nicht vorhanden")
# sleep for a total of three hours, but report status every 30 minutes
for i in range(6):
time.sleep(1800)
print("still running")
print(datetime.datetime.now())
if __name__ == "__main__":
reddit, twitter = initBot()
Mainbot()
I haven't tested this because I don't have twitter keys.
Solution i found (i still dont fully understand what mechanic caused the bug):
import time
import tweepy
import datetime
import praw
import urllib.request
import os
import json
def Mainbot():
reddit = praw.Reddit(client_id='XXXX',
client_secret='XXXX',
user_agent='RedditFetchBot by FlyingThunder')
def Tweet(postinfo):
auth = tweepy.OAuthHandler("XXXX", "XXXX")
auth.set_access_token("XXXX",
"XXXX")
api = tweepy.API(auth)
try:
api.update_with_media("local-filename.jpg", postinfo)
except:
print("not a file post"+post.permalink)
Mainbot()
post = reddit.subreddit('okbrudimongo').random()
x = post.id
with open('data.json', 'r') as e:
eread = e.read()
if x not in eread:
with open('data.json', 'a') as f:
json.dump(x, f)
f.close()
e.close()
print(post.url + " " + post.title)
urllib.request.urlretrieve(post.url, "local-filename.jpg")
Tweet(postinfo=post.title + " (https://www.reddit.com" + post.permalink + ")")
try:
time.sleep(5)
os.remove("local-filename.jpg")
except:
print("Datei nicht vorhanden")
else:
e.close()
print("already posted")
Mainbot()
def loop():
time.sleep(1800)
print("still running")
print(datetime.datetime.now())
while True:
Mainbot()
loop()
loop()
loop()
loop()
loop()
loop()
Related
I just got to know the world of programming and python was the first thing I learned.. This program already can extract data from .txt file and send it to API..
But the things is I don't know how to delete the file,after the data have been extracted and send to the API... Here is my coding...
from fileinput import close
import os
import requests
from datetime import datetime
import glob
import time
'''List'''
data_send_list = []
'''Path'''
path = "./file"
'''File List'''
file_name = []
URL = 'http://.......'
def main():
#Main
print("Main Def" "\n")
#ScanFile
data_send_list = scan_files(path)
#send_API
for json in data_send_list:
send_api(URL, json)
def read_text_file(file_path):
with open (file_path, 'r') as file:
data_dictionary={}
data = file.readlines()
...............
'''UPDATE THE DICTIONARY'''
data_dictionary.update([(...)(...)])
return data_dictionary
def scan_files(path):
list = []
os.chdir(path)
for file in glob.glob("*.txt"):
list.append(read_text_file(file))
return list
def send_api(url,json,):
requests_session = requests.session()
post_api = requests_session.post(url,data=json)
print("Sending API")
if(post_api.status_code >= 200 and post_api.status_code <300):
print("Successful. Status code: ",post_api.status_code)
print("\n")
#i hope that i can delete the file here
else:
print("Failed to send to API. Status code: ",post_api.status_code)
print("\n")
close()
return post_api.status_code
I was hoping that if the data can be sent to API... and give output "status code: 200" the data file will be deleted... while the data that is not sent, the file will remain
There would be a lot of better ways other than my answer.
import os
...
def send_api(url,json,path): # You need to add function parameter path to use at this function
requests_session = requests.session()
post_api = requests_session.post(url,data=json)
print("Sending API")
if(post_api.status_code >= 200 and post_api.status_code <300):
print("Successful. Status code: ",post_api.status_code)
print("\n")
os.remove(path) # use os.remove function to remove file
else:
print("Failed to send to API. Status code: ",post_api.status_code)
print("\n")
close()
return post_api.status_code
I'm inserting my code below. I couldn't really do anything on this topic, I'm still learning so that isn't helping either.
I'm inserting my code below. I couldn't really do anything on this topic, I'm still learning so that isn't helping either.
Someone suggested using pickle, JSON, or SQL but I'm not familiar with any of them.
I'm using replit so it automatically restarts the bot .
import praw
import time
import random
import requests
import sys
from sys import exit
print("Starting Magic............")
print(reddit.user.me())
REDDIT_USERNAME=(reddit.user.me())
response = requests.get("https://www.reddit.com/user/{}/about.json".format(REDDIT_USERNAME), headers = {'User-agent': 'hiiii its {}'.format(REDDIT_USERNAME)}).json()
if "error" in response:
if response["error"] == 404:
print("account {} is shadowbanned. poor bot :( shutting down the script...".format(REDDIT_USERNAME))
sys.exit()
else:
print(response)
else:
print("{} is not shadowbanned! We think..".format(REDDIT_USERNAME))
title = input("Enter an epic title: ")
title2 = input("Enter an epic title: ")
url = input("Enter a sassy link: ")
url2 = input("Enter a sassy link: ")
print("Reading reddit list")
subredit_list = open("data.txt", "r")
subreddits = subredit_list.read().split(',')
for subreddit in subreddits:
try:
print(subreddit)
reddit.validate_on_submit = True
submission = reddit.subreddit(subreddit).submit(title,url=url)
time.sleep(10)
print ("done")
except Exception as err:
print("Exception for subreddit {}, {}".format(subreddit, err))
t= random.randint(615,815)
seconds = "Sleeping for {} seconds before proceeding".format(t)
print(seconds)
time.sleep(t)
print("Reading reddit list")
subredit_list = open("data.txt", "r")
subreddits = subredit_list.read().split(',')
for subreddit in subreddits:
try:
print(subreddit)
reddit.validate_on_submit = True
submission = reddit.subreddit(subreddit).submit(title2,url=url2)
time.sleep(10)
print ("done")
except Exception as err:
print("Exception for subreddit {}, {}".format(subreddit, err))
t= random.randint(505,825)
seconds = "Sleeping for {} seconds before proceeding".format(t)
print(seconds)
time.sleep(t)
Today I wrote a twitter bot that replies anybody who mentions it with a random image from a folder.
The problem here is that I'm a newbie in python and I don't know how to make it funcitonal at all. When I started running it, the bot started replying all the mentions from other users (I'm using an old account a friend gave to me), and that's not precisely what I want, even if it's working, but not as I desire.
The bot is replying all the mentions from the very beggining and it won't stop until all these are replied (the bot is now turned off, I don't want to annoy anybody)
How can I make it to only reply to latest mentions instead of the first ones?
here's the code:
import tweepy
import logging
from config import create_api
import time
import os
import random
from datetime import datetime
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
api = create_api()
imagePath = random.choice(os.listdir("images/"))
while True:
for tweet in tweepy.Cursor(api.mentions_timeline).items():
try:
imagePath = random.choice(os.listdir("images/"))
tweetId = tweet.user.id
username = tweet.user.screen_name
api.update_with_media('images/' + imagePath, "#" + username + " ", in_reply_to_status_id=tweet.id)
print('Replying to ' + username + 'with ' + imagePath)
except tweepy.TweepError as e:
print(e.reason)
except StopIteration:
break
time.sleep(12)
Thanks in advance.
I don't have the ability to test this code currently but this should work.
Instead of iterating over every tweet, it turns the iterator that tweepy.Cursor returns into a list and then just gets the last item in that list.
api = create_api()
imagePath = random.choice(os.listdir("images/"))
while True:
tweet_iterator = tweepy.Cursor(api.mentions_timeline).items()
latest_tweet = list(tweet_iterator)[-1]
try:
imagePath = random.choice(os.listdir("images/"))
tweetId = latest_tweet.user.id
username = latest_tweet.user.screen_name
api.update_with_media('images/' + imagePath, "#" + username + " ", in_reply_to_status_id=latest_tweet.id)
print('Replying to ' + username + 'with ' + imagePath)
except tweepy.TweepError as e:
print(e.reason)
except StopIteration:
break
time.sleep(12)
You will also want to keep track of what user you last replied to, so you don't just keep spamming the same person over and over.
This isn't the most efficient way of doing it but should be easy enough to understand:
latest_user_id = None
while True:
# Rest of the code
try:
if latest_user_id == latest_tweet.user.id:
# don't do anything
else:
latest_user_id = latest_tweet.user.id
# the rest of your code
I'm new to python and I want this code to run only once and stops, not every 30 seconds
because I want to run multiple codes like this with different access tokens every 5 seconds using the command line.
and when I tried this code it never jumps to the second one because it's a while true:
import requests
import time
api_url = "https://graph.facebook.com/v2.9/"
access_token = "access token"
graph_url = "site url"
post_data = { 'id':graph_url, 'scrape':True, 'access_token':access_token }
# Beware of rate limiting if trying to increase frequency.
refresh_rate = 30 # refresh rate in second
while True:
try:
resp = requests.post(api_url, data = post_data)
if resp.status_code == 200:
contents = resp.json()
print(contents['title'])
else:
error = "Warning: Status Code {}\n{}\n".format(
resp.status_code, resp.content)
print(error)
raise RuntimeWarning(error)
except Exception as e:
f = open ("open_graph_refresher.log", "a")
f.write("{} : {}".format(type(e), e))
f.close()
print(e)
time.sleep(refresh_rate)
From what I understood you're trying to execute the piece of code for multiple access tokens. To make your job simple, have all your access_tokens as lists and use the following code. It assumes that you know all your access_tokens in advance.
import requests
import time
def scrape_facebook(api_url, access_token, graph_url):
""" Scrapes the given access token"""
post_data = { 'id':graph_url, 'scrape':True, 'access_token':access_token }
try:
resp = requests.post(api_url, data = post_data)
if resp.status_code == 200:
contents = resp.json()
print(contents['title'])
else:
error = "Warning: Status Code {}\n{}\n".format(
resp.status_code, resp.content)
print(error)
raise RuntimeWarning(error)
except Exception as e:
f = open (access_token+"_"+"open_graph_refresher.log", "a")
f.write("{} : {}".format(type(e), e))
f.close()
print(e)
access_token = ['a','b','c']
graph_url = ['sss','xxx','ppp']
api_url = "https://graph.facebook.com/v2.9/"
for n in range(len(graph_url)):
scrape_facebook(api_url, access_token[n], graph_url[n])
time.sleep(5)
I'm programing a program for downloading images from internet and I would like to speed it up using multiple requests at once.
So I wrote a code you can see here at GitHub.
I can request for webpage only like this:
def myrequest(url):
worked = False
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
while not worked:
try:
webpage_read = urlopen(req).read()
worked = True
except:
print("failed to connect to \n{}".format(url))
return(webpage_read)
url = "http://www.mangahere.co/manga/mysterious_girlfriend_x"
webpage_read = myrequest(url).decode("utf-8")
The while is here because I definitely want to download every single picture, so I'm trying until it work (nothing can go wrong except urllib.error.HTTPError: HTTP Error 504: Gateway Time-out)
My question is, how to run that multiple times at once?
My idea is to have " a comander" which will run 5 (or 85) pythonic scripts, give each url and get webpage from them once they are finished, but this is definitely a silly solution :)
EDIT:
I used _thread but it doesn't seem to speed up the program. That should have been the solution am I doing it wrong? that is my new question.
You can use link do get to my code on GitHub
def thrue_thread_download_pics(path, url, ep, name):
lock.acquire()
global goal
goal += 1
lock.release()
webpage_read = myrequest("{}/{}.html".format(url, ep))
url_to_pic = webpage_read.decode("utf-8").split('" onerror="')[0].split('<img src="')[-1]
pic = myrequest(url_to_pic)
myfile = open("{}/pics/{}.jpg".format(path, name), "wb")
myfile.write(pic)
myfile.close()
global finished
finished += 1
and I'm using it here:
for url_ep in urls_eps:
url, maxep = url_ep.split()
maxep = int(maxep)
chap = url.split("/")[-1][2:]
if "." in chap:
chap = chap.replace(".", "")
else:
chap = "{}0".format(chap)
for ep in range(1, maxep + 1):
ted = time.time()
name = "{}{}".format(chap, "{}{}".format((2 - len(str(ep))) * "0", ep))
if name in downloaded:
continue
_thread.start_new_thread(thrue_thread_download_pics, (path, url, ep, name))
checker = -1
while finished != goal:
if finished != checker:
checker = finished
print("{} of {} downloaded".format(finished, goal))
time.sleep(0.1)
Requests Futures is built on top of the very popular requests library and uses non-blocking IO:
from requests_futures.sessions import FuturesSession
session = FuturesSession()
# These requests will run at the same time
future_one = session.get('http://httpbin.org/get')
future_two = session.get('http://httpbin.org/get?foo=bar')
# Get the first result
response_one = future_one.result()
print(response_one.status_code)
print(response_one.text)
# Get the second result
response_two = future_two.result()
print(response_two.status_code)
print(response_two.text)