Getting Twitter Followers using Twitter's REST API - python

I am using python script to get the followers for a specific user. The script runs perfectly and it returns the IDs of the followers when i use user lookup API it only returns 3 result. The script is like this:
#!/usr/bin/python
from twitter import *
import sys
import csv
import json
config = {}
execfile("/home/oracle/Desktop/twitter-1.17.1/config.py", config)
twitter = Twitter(
auth = OAuth(config["access_key"], config["access_secret"],config["consumer_key"], config["consumer_secret"]))
username = "#####"
query = twitter.followers.ids(screen_name = username)
print "found %d followers" % (len(query["ids"]))
for n in range(0, len(query["ids"]), 100):
ids = query["ids"][n:n+100]
subquery = twitter.users.lookup(user_id = ids)
for user in subquery:
print " [%s] %s" % ("*" if user["verified"] else " ", user["screen_name"])
# print json.dumps(user)
And it returns the output like this:
{u'next_cursor_str': u'0', u'previous_cursor': 0, u'ids': [2938672765, 1913345678, 132150958, 2469504797, 2162312397, 737550671029764097, 743699723786158082, 743503916885737473, 742612685632770048, 742487358826811392, 742384945121878020, 741959985127665664, 1541162424, 739102973830254592, 740198523724038144, 542050890, 739971273934176256, 2887662768, 738922874011013120, 738354749045669888, 737638395711791104, 737191937061584896, 329618583, 3331556957, 729645523515396096, 2220176421, 162387597, 727099914635874304, 726665274737475584, 725406360406470657, 938760691, 715260034335305729, 723912842320158720, 538208881, 2188791158, 723558257541828608, 1263571466, 720182865275842564, 719947801598259200, 636067084, 719412219168038912, 719199478260043776, 715921761158574080........ ], u'next_cursor': 0, u'previous_cursor_str': u'0'}
When i use the user look up API it only returns 4 screen names like this:
found 1106 followers
[ ] In_tRu_dEr
[ ] amanhaider3
[ ] SaaddObaid
[ ] Soerwer
I want the screen names of all the IDs present but it returns only 4. Can anyone help.

your issue is in those 2 lines
(I assumed second line is intended although it is not in the question)
for n in range(0, len(query["ids"]), 100):
ids = query["ids"][n:n+100]
those lines will create multi ids arrays and they overwrite each other
so first iteration ids will have the ids from 0 to 100
then you overwrite it with ids from 100 to 200 and so on
till you reach last iteration from 1100 to 1106
so ids will only have ids on it
and apprenatly from those 6 only 4 are returned by twitter.users.lookup
to fix it you will need to keep everything under the for n loop
like this
for n in range(0, len(query["ids"]), 100):
ids = query["ids"][n:n+100]
subquery = twitter.users.lookup(user_id = ids)
for user in subquery:
print " [%s] %s" % ("*" if user["verified"] else " ", user["screen_name"])
this will work

Related

Reduce time complexity for a working model (python3)

I have a working model for a chat application. The requirement is such that upon service restart, we design an in-mem mapper and fetch the first page details for each DM / group from that mapper based on the ID.
The working model is as follows:
'''
RECEIVER_SENDER_MAPPER = {"61e7dbcf9edba13755a4eb07" : {"61e7a5559edba13755a4ea65":[{},{},{},{},first page entries(25)],
"61de751742fc165ec8b729c9":[{},{},{},{},first page entries(25)]},
"61e7a5559edba13755a4ea65" : {"61e7dbcf9edba13755a4eb07":[{},{},{},{},first page entries(25)],
"61de751742fc165ec8b729c9":[{},{},{},{},first page entries(25)]}
}
'''
RECEIVER_SENDER_MAPPER = {}
def sync_to_inmem_from_db():
global RECEIVER_SENDER_MAPPER
message = db.messages.find_one()
if message:
#Fetch all users from db
users = list(db.users.find({},{"_id":1, "username":1}))
prepared_data = {}
counter = 0
for user in users:
counter += 1
#find all message groups which user is a part of
user_channel_ids = list(db.message_groups.find({"channel_members":{"$in":[user["_id"]]}},{"_id":1, "channel_name":1}))
combined_list = user_channel_ids + users
users_mapped_data = {}
for x in combined_list:
counter += 1
if x["_id"] == user["_id"]:
continue
find_query = {"receiver_type":"group", "receiver_id":x["_id"], "pid":"0"}
if x.get("username"):
find_query = {"pid":"0", "receiver_type":"user"}
messages = list(db.messages.find(find_query).sort("created_datetime",
-1).limit(50))
if messages:
users_mapped_data[x["_id"]] = messages
prepared_data[user["_id"]] = users_mapped_data
RECEIVER_SENDER_MAPPER = prepared_data
if not RECEIVER_SENDER_MAPPER:
sync_to_inmem_from_db()
The value of the counter for 70 users and 48 message groups is : 5484, It takes close to 9 mins to create the RECEIVER_SENDER_MAPPER.
I have to reduce this atleast to 1/4th of the value
One optimization i found was, since group messages will be same for all the users of the particular group, i can just create a dictionary this way:
all_channels = list(db.message_groups.find())
channels_data = {channel["_id"] : list(db.messages.find({"receiver_id":channel["_id"]}).limit(50)) for channel in all_channels}
But here again, while looping the users, i have to again loop the groups to find if the "user" is a part of that group or not.!
Any idea to reduce the complexity of this ? Thanks in advance.

A log file which contains information like < timestamp , customer-id , page-id ,list of titles>

Write a code to print all the unique customers visited in last hour
My try:
import datetime
def find_repeated_customer():
file_obj = open(" my file path","r")
customer_last_visit = {}
repeat_customer = set()
while line in file_obj:
timestamp,customer_id,page_id = line.split(" : ")
last_visit = customer_last_vist.get(customer_id,None)
if not last_visit:
customer_last_visit[customer_id] = last_visit
else:
# assuming time stamp looks like 2016-10-29 01:03:26.947000
year,month,date = timestamp.split(" ")[0].split("-")
current_visit = datetime.date(year,month,date)
day_diff = current_visit - last_visit
if day_diff >=1:
repeat_customer.add(customer_id)
customer_last_visit[customer_id] = current_visit
I am completely failing over in order to get my desired output. By doing this I am able to get repeated customers in last one day but how to get unique users?
You can't do this kind of manipulation in one pass. You have to pass once through lines to get customers, and only then You can check who came once. In another pass, You check if current customer is in list on once-customers and do something with him.

Get more than 50 members of a group from Soundcloud

I'm trying to get all the members of a SoundCloud group using the Python API.
So far I can get the first 50 but providing the mentioned "linked_partitioning=1" argument doesn't seem to be move on to the next set of members.
My code is:
# Login and authenticate
client = soundcloud.Client(client_id = clientId,
client_secret = clientSecret,
username = username,
password = password)
# print authenticated user's username
print client.get('/me').username
# Get members
count = 1
total = 0;
while count > 0 and total < max:
members = client.get('/resolve', url=group_url, linked_partitioning=1)
count = len(members)
total += count
# Debug Output
print "Total: " + str(total) + ". Retrieved another " + str(count) + " members."
print members[0].username
I've been looking at: https://developers.soundcloud.com/docs/api/guide#pagination but still haven't managed to find a solution.
Snippet of working PHP code using linked_partioning and limit (max value can be 200). The default result set size is 50.
I use the limit with all the endpoints, but have not as yet touched groups, so I can't verify that it works there.
$qryString = self::API_ENDPOINT . self::SEARCH_API_ARTISTS
. "/" . $userId ."/tracks?" . $this->getClientId(true);
$qryString .= "&limit=" . self::MAX_API_PAGE_SIZE;
$qryString .= "&linked_partitioning=1";

How do I join results of looping script into a single variable?

I have looping script returning different filtered results, I can make this data return as an array for each of the different filter classes. However I am unsure of the best method to join all of these arrays together.
import mechanize
import urllib
import json
import re
import random
import datetime
from sched import scheduler
from time import time, sleep
from sets import Set
##### Code to loop the script and set up scheduling time
s = scheduler(time, sleep)
random.seed()
##### Code to stop duplicates part 1
userset = set ()
def run_periodically(start, end, interval, func):
event_time = start
while event_time < end:
s.enterabs(event_time, 0, func, ())
event_time += interval + random.randrange(-5, 10)
s.run()
##### Code to get the data required from the URL desired
def getData():
post_url = "URL OF INTEREST"
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.addheaders = [('User-agent', 'Firefox')]
##### These are the parameters you've got from checking with the aforementioned tools
parameters = {'page' : '1',
'rp' : '250',
'sortname' : 'race_time',
'sortorder' : 'asc'
}
##### Encode the parameters
data = urllib.urlencode(parameters)
trans_array = browser.open(post_url,data).read().decode('UTF-8')
xmlload1 = json.loads(trans_array)
pattern2 = re.compile('/control/profile/view/(.*)\' title=')
pattern4 = re.compile('title=\'posted: (.*) strikes:')
pattern5 = re.compile('strikes: (.*)\'><img src=')
for row in xmlload1['rows']:
cell = row["cell"]
##### defining the Keys (key is the area from which data is pulled in the XML) for use in the pattern finding/regex
user_delimiter = cell['username']
selection_delimiter = cell['race_horse']
user_numberofselections = float(re.findall(pattern4, user_delimiter)[0])
user_numberofstrikes = float(re.findall(pattern5, user_delimiter)[0])
strikeratecalc1 = user_numberofstrikes/user_numberofselections
strikeratecalc2 = strikeratecalc1*100
userid_delimiter_results = (re.findall(pattern2, user_delimiter)[0])
##### Code to stop duplicates throughout the day part 2 (skips if the id is already in the userset)
if userid_delimiter_results in userset: continue;
userset.add(userid_delimiter_results)
arraym = ""
arrayna = ""
if strikeratecalc2 > 50 and strikeratecalc2 < 100):
arraym0 = "System M"
arraym1 = "user id = ",userid_delimiter_results
arraym2 = "percantage = ",strikeratecalc2,"%"
arraym3 = ""
arraym = [arraym0, arraym1, arraym2, arraym3]
if strikeratecalc2 > 0 and strikeratecalc2 < 50):
arrayna0 = "System NA"
arrayna1 = "user id = ",userid_delimiter_results
arrayna2 = "percantage = ",strikeratecalc2,"%"
arrayna3 = ""
arrayna = [arrayna0, arrayna1, arrayna2, arrayna3]
getData()
run_periodically(time()+5, time()+1000000, 10, getData)
What I want to be able to do, is return both the 'arraym' and the 'arrayna' as one final Array, however due to the looping nature of the script upon each loop of the script the old 'arraym'/'arrayna' are overwritten, currently my attempts to yield one array containing all of the data has resulted in the last userid for 'systemm' and the last userid for 'sustemna'. This is obviously because, upon each run of the loop it overwrites the old 'arraym' and the 'arrayna' however I do not know of a way to get around this, so that all of my data can be accumulated in one array. Please note, I have been coding for cumulatively two weeks now, so there may well be some simple function to overcome this problem.
Kind regards AEA
Without looking at that huge code segment, typically you can do something like:
my_array = [] # Create an empty list
for <some loop>:
my_array.append(some_value)
# At this point, my_array is a list containing some_value for each loop iteration
print(my_array)
Look into python's list.append()
So your code might look something like:
#...
arraym = []
arrayna = []
for row in xmlload1['rows']:
#...
if strikeratecalc2 > 50 and strikeratecalc2 < 100):
arraym.append("System M")
arraym.append("user id = %s" % userid_delimiter_results)
arraym.append("percantage = %s%%" % strikeratecalc2)
arraym.append("")
if strikeratecalc2 > 0 and strikeratecalc2 < 50):
arrayna.append("System NA")
arrayna.append("user id = %s" % userid_delimiter_results)
arrayna.append("percantage = %s%%" % strikeratecalc2)
arrayna.append("")
#...

Need to handle keyerror exception python

I'm getting a keyerror exception when I input a player name here that is not in the records list. I can search it and get back any valid name, but if I input anything else, i get a keyerror. I'm not really sure how to go about handling this since it's kindof confusing already dealing with like 3 sets of data created from parsing my file.
I know this code is bad I'm new to python so please excuse the mess - also note that this is a sortof test file to get this functionality working, which I will then write into functions in my real main file. Kindof a testbed here, if that makes any sense.
This is what my data file, stats4.txt, has in it:
[00000] Cho'Gath - 12/16/3 - Loss - 2012-11-22
[00001] Fizz - 12/5/16 - Win - 2012-11-22
[00002] Caitlyn - 13/4/6 - Win - 2012-11-22
[00003] Sona - 4/5/9 - Loss - 2012-11-23
[00004] Sona - 2/1/20 - Win - 2012-11-23
[00005] Sona - 6/3/17 - Loss - 2012-11-23
[00006] Caitlyn - 14/2/16 - Win - 2012-11-24
[00007] Lux - 10/2/14 - Win - 2012-11-24
[00008] Sona - 8/1/22 - Win - 2012-11-27
Here's my code:
import re
info = {}
records = []
search = []
with open('stats4.txt') as data:
for line in data:
gameid = [item.strip('[') for item in line.split(']')]
del gameid[-1]
gameidstr = ''.join(gameid)
gameid = gameidstr
line = line[7:]
player, stats, outcome, date = [item.strip() for item in line.split('-', 3)]
stats = dict(zip(('kills', 'deaths', 'assists'), map(int, stats.split('/'))))
date = tuple(map(int, date.split('-')))
info[player] = dict(zip(('gameid', 'player', 'stats', 'outcome', 'date'), (gameid, player, stats, outcome, date)))
records.append(tuple((gameid, info[player])))
print "\n\n", info, "\n\n" #print the info dictionary just to see
champ = raw_input() #get champion name
#print info[champ].get('stats').get('kills'), "\n\n"
#print "[%s] %s - %s/%s/%s - %s-%s-%s" % (info[champ].get('gameid'), champ, info[champ].get('stats').get('kills'), info[champ].get('stats').get('deaths'), info[champ].get('stats').get('assists'), info[champ].get('date')[0], info[champ].get('date')[1], info[champ].get('date')[2])
#print "\n\n"
#print info[champ].values()
i = 0
for item in records: #this prints out all records
print "\n", "[%s] %s - %s/%s/%s - %s - %s-%s-%s" % (records[i][0], records[i][1]['player'], records[i][1]['stats']['kills'], records[i][1]['stats']['deaths'], records[i][1]['stats']['assists'], records[i][1]['outcome'], records[i][1]['date'][0], records[i][1]['date'][1], records[i][1]['date'][2])
i = i + 1
print "\n" + "*" * 50
i = 0
for item in records:
if champ in records[i][1]['player']:
search.append(records[i][1])
else:
pass
i = i + 1
s = 0
if not search:
print "no availble records" #how can I get this to print even if nothing is inputted in raw_input above for champ?
print "****"
for item in search:
print "\n[%s] %s - %s/%s/%s - %s - %s-%s-%s" % (search[s]['gameid'], search[s]['player'], search[s]['stats']['kills'], search[s]['stats']['deaths'], search[s]['stats']['assists'], search[s]['outcome'], search[s]['date'][0], search[s]['date'][1], search[s]['date'][2])
s = s + 1
I tried setting up a Try; Except sort of thing but I couldn't get any different result when entering an invalid player name. I think I could probably set something up with a function and returning different things if the name is present or not but I think I've just gotten myself a bit confused. Also notice that no match does indeed print for the 8 records that aren't matches, though thats not quite how I want it to work. Basically I need to get something like that for any invalid input name, not just a valid input that happens to not be in a record in the loop.
Valid input names for this data are:
Cho'Gath, Fizz, Caitlyn, Sona, or Lux - anything else gives a keyerror, thats what I need to handle so it doesn't raise an error and instead just prints something like "no records available for that champion" (and prints that only once, rather then 8 times)
Thanks for any help!
[edit] I was finally able to update this code in the post (thank you martineau for getting it added in, for some reason backticks aren't working to block code and it was showing up as bold normal text when i pasted. Anyways, look at if not search, how can I get that to print even if nothing is entered at all? just pressing return on raw_input, currently it prints all records after **** even though i didn't give it any search champ
where is your exact error occurring?
i'm just assuming it is when champ = raw_input() #get champion name
and then info[champ]
you can either check if the key exists first
if champ not in info:
print 'no records avaialble'
or use get
if info.get(champ)
or you can just try and access the key
try:
info[champ]
# do stuff
except KeyError:
print 'no records available'
the more specific you can be in your question the better, although you explained your problem you really didn't include any specifics Please always include a traceback if available, and post the relevant code IN your post not on a link.
Here's some modifications that I think address your problem. I also reformatted the code to make it a little more readable. In Python it's possible to continue long lines onto the next either by ending with a \ or just going to the next line if there's an unpaired '(' or '[' on the previous line.
Also, the way I put code in my questions or answer here is by cutting it out of my text editor and then pasting it into the edit window, after that I make sure it's all selected and then just use the {} tool at the top of edit window to format it all.
import re
from pprint import pprint
info = {}
records = []
with open('stats4.txt') as data:
for line in data:
gameid = [item.strip('[') for item in line.split(']')]
del gameid[-1]
gameidstr = ''.join(gameid)
gameid = gameidstr
line = line[7:]
player, stats, outcome, date = [item.strip() for item in line.split('-', 3)]
stats = dict(zip(('kills', 'deaths', 'assists'), map(int, stats.split('/'))))
date = tuple(map(int, date.split('-')))
info[player] = dict(zip(('gameid', 'player', 'stats', 'outcome', 'date'),
(gameid, player, stats, outcome, date)))
records.append(tuple((gameid, info[player])))
#print "\n\n", info, "\n\n" #print the info dictionary just to see
pprint(info)
champ = raw_input("Champ's name: ") #get champion name
#print info[champ].get('stats').get('kills'), "\n\n"
#print "[%s] %s - %s/%s/%s - %s-%s-%s" % (
# info[champ].get('gameid'), champ, info[champ].get('stats').get('kills'),
# info[champ].get('stats').get('deaths'), info[champ].get('stats').get('assists'),
# info[champ].get('date')[0], info[champ].get('date')[1],
# info[champ].get('date')[2])
#print "\n\n"
#print info[champ].values()
i = 0
for item in records: #this prints out all records
print "\n", "[%s] %s - %s/%s/%s - %s - %s-%s-%s" % (
records[i][0], records[i][1]['player'], records[i][1]['stats']['kills'],
records[i][1]['stats']['deaths'], records[i][1]['stats']['assists'],
records[i][1]['outcome'], records[i][1]['date'][0],
records[i][1]['date'][1], records[i][1]['date'][2])
i = i + 1
print "\n" + "*" * 50
i = 0
search = []
for item in records:
if champ in records[i][1]['player']:
search.append(records[i][1])
i = i + 1
if not search:
print "no match"
exit()
s = 0
for item in search:
print "\n[%s] %s - %s/%s/%s - %s - %s-%s-%s" % (search[s]['gameid'],
search[s]['player'], search[s]['stats']['kills'],
search[s]['stats']['deaths'], search[s]['stats']['assists'],
search[s]['outcome'], search[s]['date'][0], search[s]['date'][1],
search[s]['date'][2])
s = s + 1

Categories