I have the codes for twython streaming and it is working.
def read_cred(file):
in_handle = open(file,'r')
cred = {}
for ln in in_handle:
data = ln.strip('\r\n').split('=')
if len(data) > 1:
key = data[0].strip(' ').lower()
value = data[1].strip(' ')
cred[key] = value
else:
print "error in parsing credentials file"
return cred
cred = read_cred(sys.argv[1])
class MyStreamer(TwythonStreamer):
def on_success(self, data):
act(data)
def on_error(self, status_code, data):
print status_code, data
stream = MyStreamer(cred['consumer_key'], cred['consumer_secret'],
cred['access_token_key'], cred['access_token_secret'])
keywords = sys.argv[2]
stream.statuses.filter(track=keywords)
However, I want to create a UI in django framework which consist of a 'start' and a 'stop' button. What should I do to stop the twython streaming when I clicked on the button 'stop' ? Can give me some simple examples pls?
As long as the stop button is bound to the rest of the app (I haven't used Django, so I can't provide a simple example) then calling sys.exit() should do the job.
Django might have some other method for terminating a process built into it or some specific examples in its documentation. You should check that to confirm this answer.
You can use the disconnect() function as described in the documentation of Twython - https://twython.readthedocs.org/en/latest/api.html#twython.TwythonStreamer.disconnect
def on_stop(self, status_code, data):
self.disconnect()
def on_start(keywords):
stream = MyStreamer(cred['consumer_key'], cred['consumer_secret'],
cred['access_token_key'], cred['access_token_secret'])
stream.statuses.filter(track=keywords)
Related
I'm trying to use Google Cloud's text-to-speech API. The problem I'm running into is that periodically the API returns a status of "500 Internal Server Error." The correct logic for these errors is usually to just retry the call. Unfortunately, I can't get any of Google Cloud's retry logic to work. As soon as I hit the exception my script exits.
My API function:
async def get_audio_from_google(input_text: str, output_file: str):
"""
Convert the provided text to audio using the Google text-to-speech API.
Args:
input_text: Text to conver to speech.
output_file: File path to write. File extension will be added automatically.
Returns: Writes the audio file to disk. Does not return a result.
"""
client = texttospeech.TextToSpeechAsyncClient()
# Create and configure the Synthesis object.
synthesis_input = texttospeech.SynthesisInput()
synthesis_input.text = input_text
voice_parameters = texttospeech.VoiceSelectionParams()
voice_parameters.language_code = VOICE_ENCODING
voice_parameters.name = VOICE
audio_parameters = texttospeech.AudioConfig()
if AUDIO_FORMAT == AudioFormat.MP3:
audio_parameters.audio_encoding = texttospeech.AudioEncoding.MP3
elif AUDIO_FORMAT == AudioFormat.OPUS:
audio_parameters.audio_encoding = texttospeech.AudioEncoding.OGG_OPUS
else:
print("Invalid audio format specified")
sys.exit(1)
logging.info(f"Synthesizing speech for {output_file}")
# Build our request.
request = texttospeech.SynthesizeSpeechRequest()
request.input = synthesis_input
request.voice = voice_parameters
request.audio_config = audio_parameters
# Get audio.
# Configure when to retry on error.
retry_object = retry.Retry(initial=5, timeout=90)
response = await client.synthesize_speech(request=request, retry=retry_object)
with open(f"{output_file}.{AUDIO_FORMAT}", "wb") as out:
# Write the response to the output file.
out.write(response.audio_content)
logging.info(f'Audio content written to file "{output_file}.{AUDIO_FORMAT}"')
TextToSpeechAsyncClient's synthesize_speech method accepts an instance of Retry, which is part of the Google Core API and can be used as a decorator or passed to some methods. Unfortunately, I can't seem to get the retry logic to work. By default it should retry on any error classed as transient, which includes Internal Server Error (error 500):
if_transient_error = if_exception_type(
exceptions.InternalServerError,
exceptions.TooManyRequests,
exceptions.ServiceUnavailable,
requests.exceptions.ConnectionError,
requests.exceptions.ChunkedEncodingError,
auth_exceptions.TransportError,
I've tried both passing retry to synthesize_speech and using it as a decorator for get_audio_from_google. In either case, as soon as my script gets an error response from the server, it exits.
How I'm calling get_audio_from_google:
def process_audio(text: List[str]):
"""
Process text asynchronously in segments and output a bunch of audio files ready for stitching.
Args:
text (List[str]): List of text snippets to process.
"""
async def gather_with_concurrency(max_tasks: int, *coroutines):
"""
Run tasks in parallel with a limit.
https://stackoverflow.com/questions/48483348/how-to-limit-concurrency-with-python-asyncio
Args:
max_tasks: Maximum number of tasks to run at once.
*coroutines: All async tasks that should be run.
"""
semaphore = asyncio.Semaphore(max_tasks)
async def sem_coro(coro):
async with semaphore:
return await coro
return await asyncio.gather(*(sem_coro(c) for c in coroutines))
async def main():
snippet_counter = 1
subtasks = []
for text_snippet in text:
snippet_filename = str(snippet_counter)
snippet_counter += 1
subtasks.append(
get_audio_from_google(input_text=text_snippet, output_file=snippet_filename)
)
await gather_with_concurrency(2, *subtasks)
logging.info("Starting audio processing tasks…")
# Begin execution.
asyncio.run(main())
The issue is that the third function never seems to respond.
I haven't been able to find a reason why this happens in the telegram documentation.
Please let me know if you have this issue or seen it and know the solution.
Even a post that references an issue like this would work.
Thank you so much for the assistance.
from email import message
import os
import re
import html
import json
import telebot
import requests
import http.client
from pytube import *
from dotenv import load_dotenv
load_dotenv()
# Creating hiding, and using API Keys
API_KEY = os.getenv("API_KEY")
RAPID_KEY = os.getenv("RAPID_API")
bot = telebot.TeleBot(API_KEY)
#bot.message_handler(commands="start")
# Creating a help message for guidance on how to use bot.
def help(message):
# Trying to send help message, if unable to send, throw an error message for the user.
try:
bot.send_message(message.chat.id, "Use \"Youtube\" and the video name to search for a video.\n")
except:
bot.send_message(message.chat.id, "There was an error fetching help, the bot may be offline.\n")
# Checking data and seeing if the word "YouTube" was used in order to start the search
def data_validation(message):
query = message.text.split()
if("youtube" not in query[0].lower()): # Set flag false if regular text
return False
else:
return True
#bot.message_handler(func=data_validation)
# Searching for youtube videos
# using RAPID API
def search(message):
query = message.text.split()
# Check if data is valid, and change variable to be lowercase for easy use.
if(data_validation(message) == True and query[0].lower() == "youtube"):
try:
if(data_validation(message) == True and query[1].lower() != "-d"):
# Removing the word "YouTube" and sending the results to the YouTube search engine.
for item in query[:]:
if(item.lower() == "youtube"):
query.remove(item)
search_query = ' '.join(query)
else:
pass #If it's not term we're looking to convert, ignore it.
# RAPID API for Youtube
try:
url = "https://youtube-search-results.p.rapidapi.com/youtube-search/"
querystring = {"q":search_query}
headers = {
"X-RapidAPI-Key": RAPID_KEY,
"X-RapidAPI-Host": "youtube-search-results.p.rapidapi.com"
}
response = requests.request("GET", url, headers=headers, params=querystring) # Grabbing response information from URL
request = json.loads(response.text) # Parsing json string for python use
# Testing to see if the RAPID API service responds and is online.
if(response.status_code == 503):
# If the service is not online, let the user know.
bot.send_message(message.chat.id, f"The RAPID API service appears to be offline try back later.\n")
if(response.status_code == 429):
# If the service has reached max quota for the day, let the user know.
bot.send_message(message.chat.id, f"Max quota reached, try back in 24 hours.\n")
# Grabbing first link from json text and sending direct url and title.
first_link = str((request["items"][0]["url"]))
bot.send_message(message.chat.id, f"{first_link}\n") # Sending first link that was queried.
# If there are no results found for the requested video, sending an error message to alert the user.
except:
bot.send_message(message.chat.id, "Unable to load video.\n")
except:
pass #ignoring if not the phrase we're looking for.
def test(message):
string = message.text.split()
print(string)
if(string[0] == "test" and data_validation(message) == True):
print("This is a test and i should be printed")
bot.send_message(message.chat.id, "Test message")
# Stay alive function for bot pinging / communication
bot.infinity_polling(1440)
The first problem in your code is your first line
from email import message
You import the message from email and also pass a parameter to the data_validation function with the same name, then return False in the data_validation function. If you return false, the function never will be executed.
first give an alias to first line you imported
Try This
from email import message as msg
import os
import re
import html
import json
import telebot
import requests
import http.client
from pytube import *
from dotenv import load_dotenv
load_dotenv()
# Creating hiding, and using API Keys
API_KEY = os.getenv("API_KEY")
RAPID_KEY = os.getenv("RAPID_API")
bot = telebot.TeleBot(API_KEY)
# Creating a help message for guidance on how to use bot.
#bot.message_handler(commands=["start"])
def help(message):
# Trying to send help message, if unable to send, throw an error message for the user.
try:
bot.send_message(message.chat.id, "Use \"Youtube\" and the video name to search for a video.\n")
except:
bot.send_message(message.chat.id, "There was an error fetching help, the bot may be offline.\n")
# Checking data and seeing if the word "YouTube" was used in order to start the search
def data_validation(message):
query = message.text.split()
print(query)
if("youtube" not in query[0].lower()): # Set flag false if regular text
return False # if you return false, the function never will be executed
else:
return True
# Searching for youtube videos
# using RAPID API
#bot.message_handler(func=data_validation)
def search(message):
query = message.text.split()
print(query) # if function executed you see the query result
# Check if data is valid, and change variable to be lowercase for easy use.
if(data_validation(message) == True and query[0].lower() == "youtube"):
try:
if(data_validation(message) == True and query[1].lower() != "-d"):
# Removing the word "YouTube" and sending the results to the YouTube search engine.
for item in query[:]:
if(item.lower() == "youtube"):
query.remove(item)
search_query = ' '.join(query)
else:
pass #If it's not term we're looking to convert, ignore it.
# RAPID API for Youtube
try:
url = "https://youtube-search-results.p.rapidapi.com/youtube-search/"
querystring = {"q":search_query}
headers = {
"X-RapidAPI-Key": RAPID_KEY,
"X-RapidAPI-Host": "youtube-search-results.p.rapidapi.com"
}
response = requests.request("GET", url, headers=headers, params=querystring) # Grabbing response information from URL
request = json.loads(response.text) # Parsing json string for python use
# Testing to see if the RAPID API service responds and is online.
if(response.status_code == 503):
# If the service is not online, let the user know.
bot.send_message(message.chat.id, f"The RAPID API service appears to be offline try back later.\n")
if(response.status_code == 429):
# If the service has reached max quota for the day, let the user know.
bot.send_message(message.chat.id, f"Max quota reached, try back in 24 hours.\n")
# Grabbing first link from json text and sending direct url and title.
first_link = str((request["items"][0]["url"]))
bot.send_message(message.chat.id, f"{first_link}\n") # Sending first link that was queried.
# If there are no results found for the requested video, sending an error message to alert the user.
except:
bot.send_message(message.chat.id, "Unable to load video.\n")
except:
pass #ignoring if not the phrase we're looking for.
def test(message):
string = message.text.split()
print(string)
if(string[0] == "test" and data_validation(message) == True):
print("This is a test and i should be printed")
bot.send_message(message.chat.id, "Test message")
# Stay alive function for bot pinging / communication
bot.infinity_polling(1440)
I found that using "if name == 'main':" and keeping all the functions in "main():" as a function handler everything ran smoothly.
I'm still trying to figure out why this works.
#!/usr/bin/env python
# twitterbots/bots/favretweet.py
import tweepy
import logging
from config import create_api
import seacret
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
#stream = tweepy.Stream(seacret.KEY, seacret.SECRET, seacret.TOKEN, seacret.TOKEN_SECRET)
class FavRetweetListener(tweepy.Stream):
def __init__(self, api):
self.api = api
self.user = api.get_user(screen_name='MyGasAndEnergy1')
def on_status(self, tweet):
logger.info(f"Prosessing tweet id {tweet.id}")
if tweet.in_reply_to_status_id is not None or tweet.user.id == self.user.user_id:
return
if not tweet.favorite:
try:
tweet.favorite()
except Exception as e:
logger.error("Error on Fav", exc_info=True)
if not tweet.retweeted:
try:
tweet.retweet()
except Exception as e:
logger.error("Error on vav and retweet", exc_info=True)
def on_error(self, status):
logger.error(status)
def main(keywords):
api = create_api()
tweets_listener = FavRetweetListener(api)
#new way to auth
stream = tweepy.Stream(seacret.KEY, seacret.SECRET, seacret.TOKEN, seacret.TOKEN_SECRET)
#old way to auto + important tweets_listener for actions
stream = tweepy.Stream(api.auth, tweets_listener)
stream.filter(track=keywords, languages=["en"])
if __name__ == "__main__":
main(["Python", "Tweepy"])
I have older code for editing for my use. But this part I can not figure, because of my noobines. Code is suppose to fav and retweet in twitter if it founds suitable keyword.
New code needs:
stream = tweepy.Stream(seacret.KEY, seacret.SECRET, seacret.TOKEN, seacret.TOKEN_SECRET)
Old code needs:
tweets_listener = FavRetweetListener(api)
stream = tweepy.Stream(api.auth, tweets_listener)
But new tweepy don't work with older api.auth method but want all secret tokens to be in tweepy.Stream() and that mean that I can not launch rest of my code via tweets_listener becauce it wont accept anything more.
How can I continue. I haven't found solution for this after googling or/and can not ask proper questions to move on with this problem.
Tweepy is python module/packet for working twitter-things. This script is originally from realpython.com. Problem is that I don't want to downgrade tweepy.
So I need include FavRetweetListener, but I don't have knowledge how I have to refactor code.
I switched to tweepy.Cursor and get it working. Thanks to all. Better question next time.
https://docs.tweepy.org/en/stable/v1_pagination.html#tweepy.Cursor
That's my code. Every thing works well, but when the Twitter Stream should start there comes the following error: Stream encountered HTTP Error: 406
With the normal stream it works well, but with the asynchronous stream it doesn't. Where is my mistake?
import discord
import tweepy
import tweepy.asynchronous
class Stream(tweepy.asynchronous.AsyncStream):
async def on_connect(self):
print("connected")
async def on_status(self, status):
print(status.text)
async def on_exception(self, exception):
print(exception)
print("!exception")
"""async def on_request_error(self, status_code):
print(status_code)"""
#If An error occurs then the programm should be restartet
"""await restart(discord_client=client, twitter_stream = self)"""
async def on_closed(self, resp):
print(resp)
class Client(discord.Client):
async def on_ready(self):
#creating stream
stream = Stream(keys + tokens)
#creating api
auth = tweepy.OAuthHandler(keys)
auth.set_access_token(tokens)
api = tweepy.API(auth)
#getting specific users which the stream should follow
follow_list = [screen_name]
follower_ids = []
for _ in follow_list:
follow = int(api.get_user(screen_name = _).id)
print(follow)
follower_ids.append(follow)
print(str(follower_ids))
#starting Stream
stream.filter(follow = follow_list)
print("start")
client = Client()
client.run(token)
This is likely because you're passing invalid IDs for AsyncStream.filter's follow parameter.
follow_list seems to be a list with a single screen name or a list of screen names, not a list of user IDs. It seems like you might have intended to use follower_ids instead.
A 406 Not Acceptable HTTP Error means:
At least one request parameter is invalid. For example, the filter endpoint returns this status if:
The track keyword is too long or too short.
An invalid bounding box is specified.
Neither the track nor follow parameter are specified.
The follow user ID is not valid.
https://developer.twitter.com/en/docs/twitter-api/v1/tweets/filter-realtime/guides/connecting
What's the best way to use the Twitter Stream API with Python to collect tweets in a large area?
I'm interested in geolocation, particularly the nationwide collection of tweets in North America. I'm currently using Python and Tweepy to dump tweets from the Twitter streaming API into a MongoDB database.
I'm currently using the API's location-filter to pull tweets within a boundary box, and then I further filter to only store tweets with coordinates. I've found that if my boundary box is large enough, I run into a Python connection error:
raise ProtocolError('Connection broken: %r' % e, e)
requests.packages.urllib3.exceptions.ProtocolError: ('Connection broken: IncompleteRead(0 bytes read)', IncompleteRead(0 bytes read))
I've made the bounding box smaller (I've successfully tried NYC and NYC + New England), but it seemms like the error returns with a large enough bounding box. I've also tried threading with the intention of running multiple StreamListeners concurrently, but I don't think the API allows this (I'm getting 420 errors), or at least not in the manner that I'm attempting.
I'm using Tweepy to set up a custom StreamListener class:
class MyListener(StreamListener):
"""Custom StreamListener for streaming data."""
# def __init__(self):
def on_data(self, data):
try:
db = pymongo.MongoClient(config.db_uri).twitter
col = db.tweets
decoded_json = json.loads(data)
geo = str(decoded_json['coordinates'])
user = decoded_json['user']['screen_name']
if geo != "None":
col.insert(decoded_json)
print("Geolocated tweet saved from user %s" % user)
else: print("No geo data from user %s" % user)
return True
except BaseException as e:
print("Error on_data: %s" % str(e))
time.sleep(5)
return True
def on_error(self, status):
print(status)
return True
This is what my Thread class looks like:
class myThread(threading.Thread):
def __init__(self, threadID, name, streamFilter):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.streamFilter = streamFilter
def run(self):
print("Starting " + self.name)
#twitter_stream.filter(locations=self.streamFilter)
Stream(auth, MyListener()).filter(locations=self.streamFilter)
And main:
if __name__ == '__main__':
auth = OAuthHandler(config.consumer_key, config.consumer_secret)
auth.set_access_token(config.access_token, config.access_secret)
api = tweepy.API(auth)
twitter_stream = Stream(auth, MyListener())
# Bounding boxes:
northeast = [-78.44,40.88,-66.97,47.64]
texas = [-107.31,25.68,-93.25,36.7]
california = [-124.63,32.44,-113.47,42.2]
northeastThread = myThread(1,"ne-thread", northeast)
texasThread = myThread(2,"texas-thread", texas)
caliThread = myThread(3,"cali-thread", california)
northeastThread.start()
time.sleep(5)
texasThread.start()
time.sleep(10)
caliThread.start()
There is nothing bad or unusual about getting a ProtocolError. Connections do break from time to time. You should catch this error in your code and simply restart the stream. All will be good.
BTW, I noticed you are interrogating the geo field which has been deprecated. The field you want is coordinates. You might also find places useful.
(The Twitter API docs say multiple streaming connections are not allowed.)
It seems twitter allocates one block of tweets when you try to search a keyword in a big geo (let's say country or city). I think this can be overcome by running multiple streams of program concurrently, but as separate programs.