Twitter Scraping Repeated Execution Code(python) - python

This is a Twitter scraping code that extracts tweets which contain famous keywords.
I want to repeat the entire code below every 12 hours. (Or 12 hours + 10 minutes breaks). Can you give me advice on repeating phrases?
import tweepy
import time
import os
import json
import simplejson
search_term = 'word1'
search_term2= 'word2'
search_term3='word3'
lat = "xxxx"
lon = "xxxx"
radius = "xxxx"
location = "%s,%s,%s" % (lat, lon, radius)
API_key = "xxxx"
API_secret = "xxxx"
Access_token = "xxxx"
Access_token_secret = "xxxx"
auth = tweepy.OAuthHandler(API_key, API_secret)
auth.set_access_token(Access_token, Access_token_secret)
api = tweepy.API(auth)
c=tweepy.Cursor(api.search,
q="{}+OR+{}".format(search_term, search_term2, search_term3),
rpp=1000,
geocode=location,
include_entities=True)
data = {}
i = 1
for tweet in c.items():
data['text'] = tweet.text
print(i, ":", data)
i += 1
time.sleep(1)
wfile = open(os.getcwd()+"/workk2.txt", mode='w')
data = {}
i = 0
for tweet in c.items():
data['text'] = tweet.text
wfile.write(data['text']+'\n')
i += 1
wfile.close()

You could set a Cron job that executes your script every 12 hours. To do so you should save your script with .py extension and make it executable. Then add it to your crontab:
0 0 0/12 * * ? /usr/bin/python yourscript.py
For more detail have a look at this question. Alternatively there are packages in python (e.g. APScheduler) that help you achieve this. In APScheduler you can define a job like this:
from apscheduler.schedulers.blocking import BlockingScheduler
sched = BlockingScheduler()
#sched.scheduled_job('interval', hours=12)
def timed_job():
print('This job is run every 12 hours.')
sched.configure(options_from_ini_file)
sched.start()

Related

(python)Determine when to end and restart(loop) twitter scraping

Determine when to end and restart(loop) twitter scraping
Hello This is twitter scraping code. You are importing tweets that contain keywords.
What I want to do is end the crawl after 10 hours. And it is cumulative restart to the current output.
I left a note to hear the code advice on how to do this.
import tweepy
import time
import os
import json
import simplejson
search_term = 'word1'
search_term2= 'word2'
search_term3='word3'
lat = "xxxx"
lon = "xxxx"
radius = "xxxx"
location = "%s,%s,%s" % (lat, lon, radius)
API_key = "xxxx"
API_secret = "xxxx"
Access_token = "xxxx"
Access_token_secret = "xxxx"
auth = tweepy.OAuthHandler(API_key, API_secret)
auth.set_access_token(Access_token, Access_token_secret)
api = tweepy.API(auth)
c=tweepy.Cursor(api.search,
q="{}+OR+{}".format(search_term, search_term2, search_term3),
rpp=1000,
geocode=location,
include_entities=True)
data = {}
i = 1
for tweet in c.items():
data['text'] = tweet.text
print(i, ":", data)
i += 1
time.sleep(1)
wfile = open(os.getcwd()+"/workk2.txt", mode='w')
data = {}
i = 0
for tweet in c.items():
data['text'] = tweet.text
wfile.write(data['text']+'\n')
i += 1
wfile.close()

python_" .txt" files can not be created

" .txt " files can not be created.
The code has been created, but the file is not created.
I've been advised to use " pickle ".
But I don't know how to use " pickle. "
How can I use this code to save it as a file
Also, I would like to place the number in order to save.
import tweepy
import time
import os
import json
import simplejson
search_term = 'word1'
search_term2= 'word2'
search_term3='word3'
lat = "xxxx"
lon = "xxxx"
radius = "xxxx"
location = "%s,%s,%s" % (lat, lon, radius)
API_key = "xxxx"
API_secret = "xxxx"
Access_token = "xxxx"
Access_token_secret = "xxxx"
auth = tweepy.OAuthHandler(API_key, API_secret)
auth.set_access_token(Access_token, Access_token_secret)
api = tweepy.API(auth)
c=tweepy.Cursor(api.search,
q="{}+OR+{}".format(search_term, search_term2, search_term3),
rpp=1000,
geocode=location,
include_entities=True)
data = {}
i = 1
for tweet in c.items():
data['text'] = tweet.text
print(i, ":", data)
i += 1
time.sleep(1)
wfile = open(os.getcwd()+"/workk2.txt", mode='w')
data = {}
i = 0
for tweet in c.items():
data['text'] = tweet.text
wfile.write(data['text']+'\n')
i += 1
wfile.close()

Python - Tweepy, retrieving the created_at

I'm trying to retrieve tweets and the dates as to when they were created. This is what my code looks like so far:
import tweepy
import json
import urllib
import sys
import datetime
from tweepy import OAuthHandler
user = "billgates"
count = 1
def twitter_fetch(screen_name = user,maxnumtweets=count):
consumer_token = 'INSERT CONSUMER TOKEN'
consumer_secret = 'INSERT CONSUMER SECRET'
access_token = 'INSERT ACCESS TOKEN'
access_secret = 'INSERT ACCESS SECRET'
auth = tweepy.OAuthHandler(consumer_token,consumer_secret)
auth.set_access_token(access_token,access_secret)
api = tweepy.API(auth)
for status in tweepy.Cursor(api.user_timeline,id=screen_name).items(count):
print status.text+'\n'
if __name__ == '__main__':
twitter_fetch(user,count)
I know that I presumably need to call the date using "created_at", but I'm not exactly sure where to put this in order to retrieve it. How can I do this?
As Wander Nauta said, changing the lines:
for status in tweepy.Cursor(api.user_timeline,id=screen_name).items(count):
print status.text + '\n'
to:
for status in tweepy.Cursor(api.user_timeline,id=screen_name).items(count):
print status.text + ' ' + str(status.created_at) + '\n'
should print out the tweet along with the time and date of the creation of the tweet.
I am not sure whether this is exactly what you are looking for, but this code should work:
import tweepy
import json
import urllib
import sys
import datetime
from tweepy import OAuthHandler
user = "billgates"
count = 1
def twitter_fetch(screen_name = user,maxnumtweets=count):
consumer_token = 'INSERT CONSUMER TOKEN'
consumer_secret = 'INSERT CONSUMER SECRET'
access_token = 'INSERT ACCESS TOKEN'
access_secret = 'INSERT ACCESS SECRET'
auth = tweepy.OAuthHandler(consumer_token,consumer_secret)
auth.set_access_token(access_token,access_secret)
api = tweepy.API(auth)
for status in tweepy.Cursor(api.user_timeline,id=screen_name).items(count):
print status.text+'\n'
print status.created_at
if __name__ == '__main__':
twitter_fetch(user,count)
I just added the line "print status.created_at" to your code, which will print the date and the time the tweets were created at (type is datetime.datetime).

Every 1 minute, generate a report based only on the data tweeted in last 5 minutes

My code gives continuous data, but I wanted to filter the data to last five minutes. Additionally, I wanted to report it every 1 minute. What I need to do for that?
try:
import json
except ImportError:
import simplejson as json
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream
ACCESS_TOKEN = 'secret'
ACCESS_SECRET = 'secret'
CONSUMER_KEY = 'secret'
CONSUMER_SECRET = 'secret'
oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
twitter_stream = TwitterStream(auth=oauth)
iterator = twitter_stream.statuses.filter(track="car", language="en")
for tweet in iterator:
try:
if 'text' in tweet:
print tweet['user']['name']
print tweet['user']['statuses_count']
# print '\n'
for hashtag in tweet['entities']['hashtags']:
hashtags.append(hashtag['text'])
print hashtags
except:
continue
Thanks in advance.

Retrieving Twitter data

I am using this code below to retrieve twitter hashtag data using tweepy, but this code only retrieve the tweet message and the time created, but I need to retrieve the metadata for that hashtag, any help!!
import tweepy
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import json
import datetime
#setting up the keys
consumer_key = '-------'
consumer_secret = '----------'
access_token = '--------'
access_secret = '-----------'
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
def date_range(start,end):
current = start
while (end - current).days >= 0:
yield current
current = current + datetime.timedelta(seconds=1)
class TweetListener(StreamListener):
def on_status(self, status):
#api = tweepy.API(auth_handler=auth)
#status.created_at += timedelta(hours=900)
startDate = datetime.datetime(2014, 03, 11)
stopDate = datetime.datetime(2014, 03, 13)
for date in date_range(startDate,stopDate):
status.created_at = date
print "tweet " + str(status.created_at) +"\n"
print status.text + "\n"
stream = Stream(auth, TweetListener(), secure=True, )
t = u"#سوريا"
stream.filter(track=[t])
this might help, a full specification of the status object.

Categories