Python - Tweepy, retrieving the created_at - python

I'm trying to retrieve tweets and the dates as to when they were created. This is what my code looks like so far:
import tweepy
import json
import urllib
import sys
import datetime
from tweepy import OAuthHandler
user = "billgates"
count = 1
def twitter_fetch(screen_name = user,maxnumtweets=count):
consumer_token = 'INSERT CONSUMER TOKEN'
consumer_secret = 'INSERT CONSUMER SECRET'
access_token = 'INSERT ACCESS TOKEN'
access_secret = 'INSERT ACCESS SECRET'
auth = tweepy.OAuthHandler(consumer_token,consumer_secret)
auth.set_access_token(access_token,access_secret)
api = tweepy.API(auth)
for status in tweepy.Cursor(api.user_timeline,id=screen_name).items(count):
print status.text+'\n'
if __name__ == '__main__':
twitter_fetch(user,count)
I know that I presumably need to call the date using "created_at", but I'm not exactly sure where to put this in order to retrieve it. How can I do this?

As Wander Nauta said, changing the lines:
for status in tweepy.Cursor(api.user_timeline,id=screen_name).items(count):
print status.text + '\n'
to:
for status in tweepy.Cursor(api.user_timeline,id=screen_name).items(count):
print status.text + ' ' + str(status.created_at) + '\n'
should print out the tweet along with the time and date of the creation of the tweet.

I am not sure whether this is exactly what you are looking for, but this code should work:
import tweepy
import json
import urllib
import sys
import datetime
from tweepy import OAuthHandler
user = "billgates"
count = 1
def twitter_fetch(screen_name = user,maxnumtweets=count):
consumer_token = 'INSERT CONSUMER TOKEN'
consumer_secret = 'INSERT CONSUMER SECRET'
access_token = 'INSERT ACCESS TOKEN'
access_secret = 'INSERT ACCESS SECRET'
auth = tweepy.OAuthHandler(consumer_token,consumer_secret)
auth.set_access_token(access_token,access_secret)
api = tweepy.API(auth)
for status in tweepy.Cursor(api.user_timeline,id=screen_name).items(count):
print status.text+'\n'
print status.created_at
if __name__ == '__main__':
twitter_fetch(user,count)
I just added the line "print status.created_at" to your code, which will print the date and the time the tweets were created at (type is datetime.datetime).

Related

Twitter Scraping Repeated Execution Code(python)

This is a Twitter scraping code that extracts tweets which contain famous keywords.
I want to repeat the entire code below every 12 hours. (Or 12 hours + 10 minutes breaks). Can you give me advice on repeating phrases?
import tweepy
import time
import os
import json
import simplejson
search_term = 'word1'
search_term2= 'word2'
search_term3='word3'
lat = "xxxx"
lon = "xxxx"
radius = "xxxx"
location = "%s,%s,%s" % (lat, lon, radius)
API_key = "xxxx"
API_secret = "xxxx"
Access_token = "xxxx"
Access_token_secret = "xxxx"
auth = tweepy.OAuthHandler(API_key, API_secret)
auth.set_access_token(Access_token, Access_token_secret)
api = tweepy.API(auth)
c=tweepy.Cursor(api.search,
q="{}+OR+{}".format(search_term, search_term2, search_term3),
rpp=1000,
geocode=location,
include_entities=True)
data = {}
i = 1
for tweet in c.items():
data['text'] = tweet.text
print(i, ":", data)
i += 1
time.sleep(1)
wfile = open(os.getcwd()+"/workk2.txt", mode='w')
data = {}
i = 0
for tweet in c.items():
data['text'] = tweet.text
wfile.write(data['text']+'\n')
i += 1
wfile.close()
You could set a Cron job that executes your script every 12 hours. To do so you should save your script with .py extension and make it executable. Then add it to your crontab:
0 0 0/12 * * ? /usr/bin/python yourscript.py
For more detail have a look at this question. Alternatively there are packages in python (e.g. APScheduler) that help you achieve this. In APScheduler you can define a job like this:
from apscheduler.schedulers.blocking import BlockingScheduler
sched = BlockingScheduler()
#sched.scheduled_job('interval', hours=12)
def timed_job():
print('This job is run every 12 hours.')
sched.configure(options_from_ini_file)
sched.start()

Every 1 minute, generate a report based only on the data tweeted in last 5 minutes

My code gives continuous data, but I wanted to filter the data to last five minutes. Additionally, I wanted to report it every 1 minute. What I need to do for that?
try:
import json
except ImportError:
import simplejson as json
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream
ACCESS_TOKEN = 'secret'
ACCESS_SECRET = 'secret'
CONSUMER_KEY = 'secret'
CONSUMER_SECRET = 'secret'
oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
twitter_stream = TwitterStream(auth=oauth)
iterator = twitter_stream.statuses.filter(track="car", language="en")
for tweet in iterator:
try:
if 'text' in tweet:
print tweet['user']['name']
print tweet['user']['statuses_count']
# print '\n'
for hashtag in tweet['entities']['hashtags']:
hashtags.append(hashtag['text'])
print hashtags
except:
continue
Thanks in advance.

Reddit to Twitter bot having issues with key-dictionary calls

So I'm following the tutorial of a certain reddit to twitter bot thats coded in python using PRAW and I am stuck hitting errors.
Running this code in the command console gives me the error on line 74
import praw
import json
import requests
import tweepy
import time
access_token = 'secret'
access_token_secret = ' secret'
consumer_key = 'secret'
consumer_secret = 'secret'
def strip_title(title):
if len(title) < 94:
return title
else:
return title[:93] + "..."
def tweet_creator(subreddit_info):
post_dict = {}
post_ids = []
print "[bot] Getting posts from Reddit"
for submission in subreddit_info.get_hot(limit=20):
post_dict[strip_title(submission.title)] = submission.url
post_ids.append(submission.id)
print "[bot] Generating short link using goo.gl"
mini_post_dict = {}
for post in post_dict:
post_title = post
post_link = post_dict[post]
short_link = shorten(post_link)
mini_post_dict[post_title] = short_link
return mini_post_dict, post_ids
def setup_connection_reddit(subreddit):
print "[bot] setting up connection with Reddit"
r = praw.Reddit('yasoob_python reddit twitter bot '
'monitoring %s' %(subreddit))
subreddit = r.get_subreddit(subreddit)
return subreddit
def shorten(url):
headers = {'content-type': 'application/json'}
payload = {"longUrl": url}
url = "https://www.googleapis.com/urlshortener/v1/url"
r = requests.post(url, data=json.dumps(payload), headers=headers)
link = json.loads(r.text)
return link
def duplicate_check(id):
found = 0
with open('posted_posts.txt', 'r') as file:
for line in file:
if id in line:
found = 1
return found
def add_id_to_file(id):
with open('posted_posts.txt', 'a') as file:
file.write(str(id) + "\n")
def main():
subreddit = setup_connection_reddit('showerthoughts')
post_dict, post_ids = tweet_creator(subreddit)
tweeter(post_dict, post_ids)
def tweeter(post_dict, post_ids):
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
for post, post_id in zip(post_dict, post_ids):
found = duplicate_check(post_id)
if found == 0:
print "[bot] Posting this link on twitter"
print post+" "+post_dict[post]+" #Python #reddit #bot"
api.update_status(post+" "+post_dict[post]+" #Python #reddit #bot")
add_id_to_file(post_id)
time.sleep(30)
else:
print "[bot] Already posted"
if __name__ == '__main__':
main()
Error :
print post+" "+post_dict[post]+"#python #reddit #bot"
TypeError: coercing to Unicode: need string or buffer, dict found
My understanding of the code and error is that it needs a string to be send but is somehow getting the entire key-dictionary set. I thought by sending the [post] parameter into post_dict that it will be able to get the certain post for the bot to utalize, but instead, its fetching the dictionary!
There are two lines, 74 and 75 that both call post_dict[post] and is not utalizing the dictionary's value when calling post key.
Try printing post and post_dict before you call that concatenation in the the tweeter function's For loop. That should show you what those structs look like and make the solution evident.

Retrieving Twitter data

I am using this code below to retrieve twitter hashtag data using tweepy, but this code only retrieve the tweet message and the time created, but I need to retrieve the metadata for that hashtag, any help!!
import tweepy
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import json
import datetime
#setting up the keys
consumer_key = '-------'
consumer_secret = '----------'
access_token = '--------'
access_secret = '-----------'
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
def date_range(start,end):
current = start
while (end - current).days >= 0:
yield current
current = current + datetime.timedelta(seconds=1)
class TweetListener(StreamListener):
def on_status(self, status):
#api = tweepy.API(auth_handler=auth)
#status.created_at += timedelta(hours=900)
startDate = datetime.datetime(2014, 03, 11)
stopDate = datetime.datetime(2014, 03, 13)
for date in date_range(startDate,stopDate):
status.created_at = date
print "tweet " + str(status.created_at) +"\n"
print status.text + "\n"
stream = Stream(auth, TweetListener(), secure=True, )
t = u"#سوريا"
stream.filter(track=[t])
this might help, a full specification of the status object.

Using Python and OAuth2 with Twitter streaming API

The Twitter v1 API is now defunct so I've been trying to use the Search and Streaming APIs to collate hashtag information. The Search API is rate limited, so if there are a lot of entries on a hashtag you will probably miss some. Streaming seemed like the way to go.
Using OAuth2 here is my (anonymized) code:
import oauth2 as oauth
import json
consumer_key = "<consumer key from twitter developer site>"
consumer_secret = "<consumer secret>"
oauth_token = "<access token>"
oauth_token_secret = "<access token secret>"
consumer = oauth.Consumer(key=consumer_key, secret=consumer_secret)
access_token = oauth.Token(key=oauth_token, secret=oauth_token_secret)
client = oauth.Client(consumer, access_token)
terms = json.dumps({'track' : 'twitter'})
stream_endpoint = "https://stream.twitter.com/1.1/statuses/filter.json"
response, data = client.request(stream_endpoint,"POST", body=terms, headers={'Content-Type':'application/json'})
The issue I run into is this always returns the following message:
>>>'No filter parameters found. Expect at least one parameter: follow track locations\r\n'
I think your error is becouse using JSON data on
terms = json.dumps({'track' : 'twitter'})
You should write your code just like this
terms = 'track=twitter'
USER = request.params.get('username', '00000')
LIMIT = request.params.get('limit', '50')
REQUEST_TOKEN_URL = 'https://api.twitter.com/oauth/request_token'
consumer_key ='424245wfdsfa4'
consumer_secret ='afar234252523adsasd'
if consumer_key is None or consumer_secret is None:
print 'you need consumer_key & consumer_secret key'
sys.exit(1)
signature_method_hmac_sha1 = oauth.SignatureMethod_HMAC_SHA1()
oauth_consumer = oauth.Consumer(key=consumer_key, secret=consumer_secret)
oauth_client = oauth.Client(oauth_consumer)
response, content = oauth_client.request(REQUEST_TOKEN_URL, 'POST')
if response['status'] == '200':
request_token = dict(parse_qsl(content))
else:
print 'Invalid response from Twitter requesting token.........: %s' % response['status']
endpoint = 'https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=' + USER + '&count=' + LIMIT
response, content = oauth_client.request(endpoint, 'GET')
url = response['content-location']
f = urllib2.urlopen(url)
response = f.read()
return simplejson.loads(response)

Categories