Wrong followers number - python

I'm using tweepy and I count the number of followers like that :
i = 0
for follower in tweepy.Cursor(api.followers).items():
followers_results[follower.id] = follower.screen_name
i += 1
print(i)
But I obtain a different result if I do that :
auth = tweepy.OAuthHandler('key', 'consumer_secret')
auth.set_access_token('access_token', 'access_token_secret')
api = tweepy.API(auth, wait_on_rate_limit=True)
user = api.get_user('username')
print (user.followers_count)
It seems the 2nd method gives the real number of followers (according to my account page on twitter) and the 1st always gives "number of followers - 3".
Why both method don't give the same result ? And which one should I trust ?

Related

How to get more than 100 units using Instagram Private Api?

I find very cool library called Instagram Private Api and Im trying get all people subscribed on my Instagram for educational purposes.
But I can't get more than 100 units :(
Can somebody help me understand how to fix it?
from random import randint
from time import sleep
user_id = api.username_info('target')['user']['pk']
# Create a list of followers' usernames
usernames = []
next_max_id = followers.get('next_max_id')
while next_max_id:
delay = randint(20,40)
print("Sleep " + str(delay) + "s")
sleep(delay)
# Get a list of the user's followers
followers = api.user_followers(user_id, rank_token=api.generate_uuid(),)
next_max_id = followers.get('next_max_id')
for follower in followers['users']:
usernames.append(follower['username'])
# Print the list of followers' usernames
print(len(usernames))```
From the documentation: you can pass max_id to offset the selection of users.
You can also look at the provided example.

How to collect Tweets in a JSON file on Twitter using Python?

I'm building a program that collects a specified number of tweets(no specific hashtags, just random posts) from a specific country (based on co-ordinates) over the span of 1-2 months.
For example, I'm collecting 200 tweets/status updates from the United States which were posted anywhere between September and October.
The reason I'm doing this is because I want to gather these tweets and perform sentiment analysis on the to see whether or not the average tweet from a specified country is negative/positive.
The problem I'm having is that I don't know how to "filter" for random tweets/status updates because these kind of tweets don't have hashtags. Furthermore, I'm not sure if Twitter allows me to collect tweets which are 2 months old. Any suggestions?
code
import tweepy
from tweepy import OAuthHandler
import json
import datetime as dt
import time
import os
import sys
'''
I created a twitter account for anyone to use if they want to test the code!
I used Python 3 and tweepy version 3.5.0.
'''
def load_api():
''' Function that loads the twitter API after authorizing the user. '''
consumer_key = 'nn'
consumer_secret = 'nn'
access_token = 'nn'
access_secret = 'nnn'
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)
# load the twitter API via tweepy
return tweepy.API(auth)
def tweet_search(api, query, max_tweets, max_id, since_id, geocode):
''' Function that takes in a search string 'query', the maximum
number of tweets 'max_tweets', and the minimum (i.e., starting)
tweet id. It returns a list of tweepy.models.Status objects. '''
searched_tweets = []
while len(searched_tweets) < max_tweets:
remaining_tweets = max_tweets - len(searched_tweets)
try:
new_tweets = api.search(q=query, count=remaining_tweets,
since_id=str(since_id),
max_id=str(max_id-1))
# geocode=geocode)
print('found',len(new_tweets),'tweets')
if not new_tweets:
print('no tweets found')
break
searched_tweets.extend(new_tweets)
max_id = new_tweets[-1].id
except tweepy.TweepError:
print('exception raised, waiting 15 minutes')
print('(until:', dt.datetime.now()+dt.timedelta(minutes=15), ')')
time.sleep(15*60)
break # stop the loop
return searched_tweets, max_id
def get_tweet_id(api, date='', days_ago=9, query='a'):
''' Function that gets the ID of a tweet. This ID can then be
used as a 'starting point' from which to search. The query is
required and has been set to a commonly used word by default.
The variable 'days_ago' has been initialized to the maximum
amount we are able to search back in time (9).'''
if date:
# return an ID from the start of the given day
td = date + dt.timedelta(days=1)
tweet_date = '{0}-{1:0>2}-{2:0>2}'.format(td.year, td.month, td.day)
tweet = api.search(q=query, count=1, until=tweet_date)
else:
# return an ID from __ days ago
td = dt.datetime.now() - dt.timedelta(days=days_ago)
tweet_date = '{0}-{1:0>2}-{2:0>2}'.format(td.year, td.month, td.day)
# get list of up to 10 tweets
tweet = api.search(q=query, count=10, until=tweet_date)
print('search limit (start/stop):',tweet[0].created_at)
# return the id of the first tweet in the list
return tweet[0].id
def write_tweets(tweets, filename):
''' Function that appends tweets to a file. '''
with open(filename, 'a') as f:
for tweet in tweets:
json.dump(tweet._json, f)
f.write('\n')
def main():
''' This is a script that continuously searches for tweets
that were created over a given number of days. The search
dates and search phrase can be changed below. '''
''' search variables: '''
search_phrases = ['#PythonPleaseWork']
time_limit = 1.0 # runtime limit in hours
max_tweets = 20 # number of tweets per search but it doesn't seem to be working
min_days_old, max_days_old = 1, 1 # search limits e.g., from 7 to 8
# gives current weekday from last week,
# min_days_old=0 will search from right now
USA = '39.8,-95.583068847656,2500km' # this geocode includes nearly all American
# states (and a large portion of Canada)
# but it still fetches from outside the USA
# loop over search items,
# creating a new file for each
for search_phrase in search_phrases:
print('Search phrase =', search_phrase)
''' other variables '''
name = search_phrase.split()[0]
json_file_root = name + '/' + name
os.makedirs(os.path.dirname(json_file_root), exist_ok=True)
read_IDs = False
# open a file in which to store the tweets
if max_days_old - min_days_old == 1:
d = dt.datetime.now() - dt.timedelta(days=min_days_old)
day = '{0}-{1:0>2}-{2:0>2}'.format(d.year, d.month, d.day)
else:
d1 = dt.datetime.now() - dt.timedelta(days=max_days_old-1)
d2 = dt.datetime.now() - dt.timedelta(days=min_days_old)
day = '{0}-{1:0>2}-{2:0>2}_to_{3}-{4:0>2}-{5:0>2}'.format(
d1.year, d1.month, d1.day, d2.year, d2.month, d2.day)
json_file = json_file_root + '_' + day + '.json'
if os.path.isfile(json_file):
print('Appending tweets to file named: ',json_file)
read_IDs = True
# authorize and load the twitter API
api = load_api()
# set the 'starting point' ID for tweet collection
if read_IDs:
# open the json file and get the latest tweet ID
with open(json_file, 'r') as f:
lines = f.readlines()
max_id = json.loads(lines[-1])['id']
print('Searching from the bottom ID in file')
else:
# get the ID of a tweet that is min_days_old
if min_days_old == 0:
max_id = -1
else:
max_id = get_tweet_id(api, days_ago=(min_days_old-1))
# set the smallest ID to search for
since_id = get_tweet_id(api, days_ago=(max_days_old-1))
print('max id (starting point) =', max_id)
print('since id (ending point) =', since_id)
''' tweet gathering loop '''
start = dt.datetime.now()
end = start + dt.timedelta(hours=time_limit)
count, exitcount = 0, 0
while dt.datetime.now() < end:
count += 1
print('count =',count)
# collect tweets and update max_id
tweets, max_id = tweet_search(api, search_phrase, max_tweets,
max_id=max_id, since_id=since_id,
geocode=USA)
# write tweets to file in JSON format
if tweets:
write_tweets(tweets, json_file)
exitcount = 0
else:
exitcount += 1
if exitcount == 3:
if search_phrase == search_phrases[-1]:
sys.exit('Maximum number of empty tweet strings reached - exiting')
else:
print('Maximum number of empty tweet strings reached - breaking')
break
if __name__ == "__main__":
main()
You can not get 2 months historical data with Search API.
"The Twitter Search API searches against a sampling of recent Tweets published in the past 7 days.
Before getting involved, it’s important to know that the Search API is focused on relevance and not completeness. This means that some Tweets and users may be missing from search results."
https://developer.twitter.com/en/docs/tweets/search/overview/basic-search
You can use Streaming api with country filter and instead of hashtags you can use a few stop words. Example, for US you can use "the,and" , for France "le,la,et" etc.
In addition, it is not a good idea to share your access tokens.

Tweepy: get all friends of a sample of twitter accounts: how to handle protected users

I want to look up all the friends (meaning the twitter users one is following) of a sample of friends of one twitter account, to see what other friends they have in common. The problem is that I don't know how to handle protected accounts, and I keep running into this error:
tweepy.error.TweepError: Not authorized.
This is the code I have:
...
screen_name = ----
file_name = "followers_data/follower_ids-" + screen_name + ".txt"
with open(file_name) as file:
ids = file.readlines()
num_samples = 30
ids = [x.strip() for x in ids]
friends = [[] for i in range(num_samples)]
for i in range(0, num_samples):
id = random.choice(ids)
for friend in tweepy.Cursor(api.friends_ids, id).items():
print(friend)
friends[i].append(friend)
I have a list of all friends from one account screen_name, from which I load the friend ids. I then want to sample a few of those and look up their friends.
I have also tried something like this:
def limit_handled(cursor, name):
try:
yield cursor.next()
except tweepy.TweepError:
print("Something went wrong... ", name)
pass
for i in range(0, num_samples):
id = random.choice(ids)
items = tweepy.Cursor(api.friends_ids, id).items()
for friend in limit_handled(items, id):
print(friend)
friends[i].append(friend)
But then it seems like only one friend per sample friend is stored before moving on to the next sample. I'm pretty new to Python and Tweepy so if anything looks weird, please let me know.
First of all, a couple of comments on naming. The names file and id are protected, so you should avoid using them to name variables - I have changes these.
Secondly, when you initialise your tweepy API, it's clever enough to deal with rate limits if you use wait_on_rate_limit=True and will inform you when it's delayed due to rate limits if you use wait_on_rate_limit_notify=True.
You also lose some information when you set friends = [[] for i in range(num_samples)], as you then won't be able to associate the friends you find with the account they relate to. You can instead use a dictionary, which will associate each ID used with the friends found, allowing for better processing.
My corrected code is as follows:
import tweepy
import random
consumer_key = '...'
consumer_secret = '...'
access_token = '...'
access_token_secret = '...'
# OAuth process, using the keys and tokens
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
# Creation of the actual interface, using authentication. Use rate limits.
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
screen_name = '----'
file_name = "followers_data/follower_ids-" + screen_name + ".txt"
with open(file_name) as f:
ids = [x.strip() for x in f.readlines()]
num_samples = 30
friends = dict()
# Initialise i
i = 0
# We want to check that i is less than our number of samples, but we also need to make
# sure there are IDs left to choose from.
while i <= num_samples and ids:
current_id = random.choice(ids)
# remove the ID we're testing from the list, so we don't pick it again.
ids.remove(current_id)
try:
# try to get friends, and add them to our dictionary value if we can
# use .get() to cope with the first loop.
for page in tweepy.Cursor(api.friends_ids, current_id).pages():
friends[current_id] = friends.get(current_id, []) + page
i += 1
except tweepy.TweepError:
# we get a tweep error when we can't view a user - skip them and move onto the next.
# don't increment i as we want to replace this user with someone else.
print 'Could not view user {}, skipping...'.format(current_id)
The output is a dictionary, friends, with keys of user IDs and items of the friends for each user.

Tweepy Look up ID with username

I am trying to get a list of IDs from a list of usernames I have. Is there any method that tweepy provides that lets me do lookup user IDs using their username?
Twitter API has the resource https://dev.twitter.com/rest/reference/get/users/lookup for such requirements. It can return user objects for at most 100 users at a time.
You can use this in Tweepy like:
user_objects = api.lookup_users(screen_names=list_of_at_most_100_screen_names)
user_ids = [user.id_str for user in user_objects]
screen_name = unames['username']
enter code here
#my username df
#0 briankrebs
#1 Dejan_Kosutic
#2 msftsecresponse
#3 PrivacyProf
#4 runasand
data = []
def return_twitterid(screen_name):
print("The screen name is: " + screen_name)
twitterid = client.get_user(username=screen_name)
id = twitterid.data.id
return id
for s in range(len(screen_name)):
u_id = return_twitterid(screen_name[s])
data.append(u_id)
print(data)
For anyone who landed here from Google, this is a code snippet that is basically a username to id converter. It supports twitter api V2.
# the usernames variable is a list containing all the usernames you want to convert
usernames = ["POTUS", "VP"]
users = client.get_users(usernames=usernames)
for user in users.data:
print(user.id)

Twitter search limit has never been this strict

I know that Twitter search API has it's own limitations and returns much less search results rather than the actual results but I was searching through a popular hashtag and it only returns 60 result which is not acceptable at all!
here is my code in which I've used twython module.
results = {}
last_id = None
count = 0
while(len(results.keys()) != min_count):
if(last_id):
tmp_results = self.api.search(q="#mentionsomeoneimportantforyou", count=100, max_id=last_id)
else:
tmp_results = self.api.search(q=#mentionsomeoneimportantforyou, count=100)
count += len(tmp_results['statuses'])
print("new len: ", count)
last_id = get_max_id(tmp_results)
def get_max_id(results):
next_results_url_params = results['search_metadata']['next_results']
next_max_id = next_results_url_params.split('max_id=')[1].split('&')[0]
return next_max_id
Is there anything run with this code? It not, isn't 60 of many a joke?
The twython docs suggest not doing it that way, using the cursor approach instead:
twitter = Twython(APP_KEY, APP_SECRET,
OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
results = twitter.cursor(twitter.search, q='python')
count = 0
for result in results:
print result['id_str']
count += 1
print count
prints:
... many here ...
561918886380322816
561918859050229761
561919180480737282
561919151162130434
561919142450581504
561919113812246529
561919107134922753
561919103867559938
561919077481218049
561918994454556672
561918971755372546
561918962381127680
561918948288258048
561918911751655425
561918904126042112
561918886380322816
561918859050229761
645
I think I found the reason. According to this link Twitter doesn't return tweets older than a week through search api.

Categories