asynchronous aiohttp requests send nothing, but synchronous requests succeed - python

I'm trying to get coin value on parasawp but the Async way does not work while the sync yes.
I've tried to remove ssl, wait betweens requests, limit TCP but in the end nothing worked correctly with async/aiohttp ...
I'm working on a old raspberry pi if it's useful.
here's my code :
async def get_price_as():
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False, limit=8)) as session:
req_url = "https://apiv5.paraswap.io/prices/"
param = {}
param["srcDecimals"] = str(18)
param["destDecimals"] = str(18)
param["amount"] = str(10**18)
for res in os.listdir(wu.chemin_cles):
W3[res] = Web3(Web3.HTTPProvider(wu.url_provider[res]["rpc"]))
clef_privee[res] = wu.cle_privee(res)
param["network"] = wu.chain_ID(res)
for a in adresses[res]:
for b in adresses[res]:
if b!=a:
param["srcToken"] = a
param["destToken"] = b
async with session.get(req_url,params=param) as resp:
response = await resp.json(content_type=None)
if response.get('priceRoute') == None:
print(res, " ", a, "/", b, " : ", response['error'])
c_1 += 1
r = paraswap.get_price(adresses[res][a], "18", adresses[res][b], "18", wu.chain_ID(res), str(10**18))
if r!=1:
c_2+=1
print(float(r["priceRoute"]["destAmount"])/10**18, "sync ...")
else :
response = float(response["priceRoute"]["destAmount"])/10**18
print(res, " ", a, "/", b, " : ", response)
asyncio.run(get_price_as())
# paraswap
def get_price(srcToken, srcDecimals, destToken, destDecimals, chainID, amount):
req_url = "https://apiv5.paraswap.io/prices/"
param = {}
param["srcToken"] = srcToken
param["srcDecimals"] = srcDecimals
param["destToken"] = destToken
param["destDecimals"] = destDecimals
param["network"] = chainID
param["amount"] = amount
response = requests.get(req_url,params=param)
if response.status_code == 200:
return response.json()
else:
return 1
it's not pretty yet but I just want something that work for now ... thx

Related

Python missing 1 required keyword-only argument [duplicate]

This question already has answers here:
Client.__init__() missing 1 required keyword-only argument: 'intents'
(4 answers)
Closed 6 months ago.
I have a Discord Bot for UKHotDeals, but it throws an error.
This is written for Python 3.x.
The original repository can be find in here: https://github.com/davidteather/Hotukdeals-Discord-Notifier
Traceback (most recent call last):
File "C:\Users\USER\Desktop\Hotukdeals-Discord-Notifier-master\main.py", line 179, in <module>
client = MyClient(channel_id)
File "C:\Users\USER\Desktop\Hotukdeals-Discord-Notifier-master\main.py", line 31, in __init__
super().__init__(*args, **kwargs)
TypeError: Client.__init__() missing 1 required keyword-only argument: 'intents'
I can't get where I'm missing something in the code, which is this:
import discord
import asyncio
import requests
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import json
with open('settings.json') as data:
settings = json.load(data)
min_upvotes = int(settings["min_upvotes"])
max_upvotes = int(settings["max_upvotes"])
base_url = settings["base_url"]
pages_to_index = int(settings["pages_to_index"])
discord_api_key = settings["discord_api_token"]
min_price = float(settings["min_price"])
max_price = float(settings["max_price"])
channel_id = int(settings["discord_channel_id"])
time_interval_seconds = int(settings["time_interval_seconds"])
class MyClient(discord.Client):
def __init__(self, channel, *args, **kwargs):
self.outOfStock = []
self.checkUrls = []
self.channelID = channel
super().__init__(*args, **kwargs)
# create the background task and run it in the background
self.bg_task = self.loop.create_task(self.my_background_task())
# Check deals
def checkDealsBeautifulSoup(self, url):
# Imports
import requests
from bs4 import BeautifulSoup
import json
import random
# Loads JSON and vars
with open('settings.json') as data:
settings = json.load(data)
min_upvotes = int(settings["min_upvotes"])
max_upvotes = int(settings["max_upvotes"])
min_price = float(settings["min_price"])
max_price = float(settings["max_price"])
# Loads proxies
with open('proxies.txt', 'r') as proxies:
proxies = proxies.readlines()
# Picks random proxy
proxy = random.choice(proxies)
returnMsgs = []
newArray = []
# Reads already used things
with open('data/usedLinks.txt', 'r') as data:
usedArray = data.readlines()
# Sets up proxy
proxies = {
"http": "http://" + proxy,
"https": "https://" + proxy,
}
page = requests.get(url, proxies=proxy)
soup = BeautifulSoup(page.text, 'html.parser')
var = False
# Tries to get things
try:
listings = soup.find_all(
'article', attrs={'data-handler': 'history'})
upvotes = soup.find_all('span', attrs={'class': 'cept-vote-temp'})
pricing = soup.find_all('span', attrs={'class': 'thread-price'})
urls = soup.find_all(
'a', attrs={'class': 'cept-thread-image-link'})
var = True
except:
var = False
if var == True:
upvotesIndex = 0
index = 0
for x in range(0, len(listings)):
try:
upvote = upvotes[upvotesIndex].text.strip().replace(
" ", "").replace("°", "").replace("\n", "")
if "Deal" in upvote or "alerts" in upvote:
upvotesIndex += 1
upvote = upvotes[upvotesIndex].text.strip().replace(
" ", "").replace("°", "").replace("\n", "")
except:
upvote = 0
try:
price = pricing[index].text.strip().replace("£", "")
except:
price = 0
try:
url = urls[index].get('href')
except:
url = None
if price != "FREE":
try:
price = float(price.replace(",", ""))
except:
price = 0
else:
price = 0
if min_price <= price <= max_price:
if min_upvotes <= int(upvote) <= max_upvotes:
if url != None:
if url + "\n" not in usedArray:
# Return Message
message = url + " Satisfies your deal criteria. It is at " + \
str(upvote) + \
" degrees and costs £" + str(price)
returnMsgs.append(message)
usedArray.append(url)
newArray.append(url)
upvotesIndex += 1
index += 1
# Saves new logged files
with open('data/usedLinks.txt', 'a') as fileObj:
for line in newArray:
fileObj.write(line + "\n")
# Returns stuff
return returnMsgs
# On start
async def on_ready(self):
print('Logged in as')
print(self.user.name)
print(self.user.id)
print('------')
# On message
async def on_message(self, message):
if message.author.id == self.user.id:
return
# Background manager
async def my_background_task(self):
await self.wait_until_ready()
channel = self.get_channel(int(channel_id))
while not self.is_closed():
for page in range(0, int(pages_to_index)):
print('checking page ' + str(page))
res = self.checkDealsBeautifulSoup(
base_url + "?page=" + str(page))
if res != []:
for msg in res:
await channel.send(msg)
await asyncio.sleep(int(time_interval_seconds))
# Main
client = MyClient(channel_id)
client.run(discord_api_key)
channel_id and discord_api_key correctly set in settings.json like this:
{
"min_upvotes": "500",
"max_upvotes": "1000",
"base_url": "https://www.hotukdeals.com",
"pages_to_index": "10",
"discord_api_token": "asdAxNasdDkxNzQ1NDcasdasd4ODU1OTAxOQ.GxasdNr.Hasdv7k9Iladsdvasd67jasdasdCXHF4",
"min_price": "0",
"max_price": "500",
"discord_channel_id": "5712311231233167",
"time_interval_seconds": "1800"
}
Looking at this other thread, the discord client now uses an Intent object in its constructor
client = discord.Client(intents=discord.Intents.default())
For you, you would have to fix the following call when instantiating your own instance
It seems to be that the error comes from discord.Client,
here:
class MyClient(discord.Client):
def __init__(self, channel, *args, **kwargs):
self.outOfStock = []
self.checkUrls = []
self.channelID = channel
super().__init__(*args, **kwargs)
check this:
https://discordpy.readthedocs.io/en/stable/api.html#discord.Client.intents
https://discordpy.readthedocs.io/en/stable/api.html#discord.Intents

How to make a loop for API calls Constantly?

I am wondering how do u make a loop for an api call that will keep calling that API, but when I tried making one it didn't work here is the code:
while True:
api_requesting = requests.get("https://api.battlemetrics.com/servers/3411152?include=player", headers=headers)
time.sleep(5)
jsoned_api = api_requesting.json()
function = jsoned_api["included"]
names = []
for person in function:
names.append(person['attributes']['name'])
And this is for e to call upon the request, and parsed it to give me the names of each player etc
#client.command(name='players')
async def createEmbed(ctx):
await ctx.send(f"{len(names)} players are online currently")
urString = ""
for name in names:
urString = urString + "> " + name + "\n"
urString = "```" + urString + "```"
await ctx.send(urString)
So I am wondering how will I make a loop for my request it's all the way at the beginning where it says while true: but when I run it the bot doesn't respond, and doesn't do anything.
If you want your code to stop when the bot does not respond:
success = True
while success:
api_requesting = requests.get("https://api.battlemetrics.com/servers/3411152?include=player", headers=headers)
# Success is True when the response status code is 200
success = api_requesting.status_code==200
But if you want to keep making requests, you can try:
while True:
api_requesting = requests.get("https://api.battlemetrics.com/servers/3411152?include=player", headers=headers)
if api_requesting.status_code == 200:
# Do something when the bot responds
else:
time.sleep(5)
# Do something else when the bot does not respond

Search haveibeenpwned for all emails on a domain

I am able to use haveibeenpwned to search for 1 account compromise. However, I could not find an option to use the API key to search for compromise of all the email accounts on a domain. (For example. if the domain is xyz.com, I want to search for the compromise of abc#xyz.com, peter.charlie#xyz.com and so on). I am aware of the notification email that I can sign up for. But, that is a lengthy process and I prefer using the API.
So, I wrote a script to search against haveibeenpwned for all the email address of my domain, but it takes very long. I searched through a couple of Github projects, but I did not find any such implementation. Has anyone tried this before?
I have added the code below. I am using Multi threading approach, but still it takes very long, is there any other Optimization strategy I can use? Please help. Thank you.
import requests, json
import threading
from time import sleep
import datetime
import splunklib.client as client
import splunklib.results as results
date = datetime.datetime.now()
from itertools import islice
import linecache
import sys
def PrintException():
exc_type, exc_obj, tb = sys.exc_info()
f = tb.tb_frame
lineno = tb.tb_lineno
filename = f.f_code.co_filename
linecache.checkcache(filename)
line = linecache.getline(filename, lineno, f.f_globals)
print 'EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)
class myThread (threading.Thread):
def __init__(self, threadID, name, list_emails):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.list_emails = list_emails
def run(self):
i=0
print "Starting " + self.name
for email in self.list_emails:
print i
i=i+1
result = check_pasteaccount(email)
print email
print result
print result
print "Exiting " + self.name
def check_pasteaccount(account):
account = str(account)
result = ""
URL = "https://haveibeenpwned.com/api/v3/pasteaccount/%s?truncateResponse=false" % (account)
# print(URL)
headers= {'hibp-api-key':api_key}
result = ""
try:
r = requests.get(url=URL,headers=headers)
# sleep(2)
status_code = r.status_code
if status_code == 200:
data = r.text
result = []
for entry in json.loads(data.decode('utf8')):
if int((date - datetime.datetime.strptime(entry['Date'], '%Y-%m-%dT%H:%M:%SZ')).days) > 120:
pass
else:
result.append(['Title: {0}'.format(entry['Title']), \
'Source: {0}'.format(['Source']), \
'Paste ID: {0}'.format(entry['Id'])])
if len(result) == 0:
result = "No paste reported for given account and time frame."
else:
paste_result = ""
for entry in result:
for item in entry:
paste_result += str(item) + "\r\n"
paste_result += "\r\n"
result = paste_result
elif status_code == 404:
result = "No paste for the account"
else:
if status_code == 429:
sleep(5)
# print "Limit exceeded, sleeping"
result = check_pasteaccount(account)
else:
result = "Exception"
print status_code
except Exception as e:
result = "Exception"
PrintException()
pass
return result
def split_every(n, iterable):
iterable = iter(iterable)
for chunk in iter(lambda: list(islice(iterable, n)), []):
yield chunk
def main():
print datetime.datetime.now()
# Fetching the list of email addresses from Splunk
list_emails = connect_splunk()
print datetime.datetime.now()
i=0
list_split = split_every(1000,list_emails)
threads=[]
for list in list_split:
i=i+1
thread_name = "Thread" + str(i)
thread = myThread(1, thread_name, list)
thread.start()
threads.append(thread)
# Wait for all the threads to complete
for t in threads:
t.join()
print "Completed Search"
Here's a shorter and maybe more efficient version of your script using the standard multiprocessing library instead of a hand-rolled thread system.
You'll need Python 3.6+ since we're using f-strings.
You'll need to install the tqdm module for fancy progress bars.
You can adjust the number of concurrent requests with the pool size parameter.
Output is written in machine-readable JSON Lines format into a timestamped file.
A single requests session is shared (per-worker), which means less time spent connecting to HIBP.
import datetime
import json
import multiprocessing
import random
import time
import requests
import tqdm
HIBP_PARAMS = {
"truncateResponse": "false",
}
HIBP_HEADERS = {
"hibp-api-key": "xxx",
}
sess = requests.Session()
def check_pasteaccount(account):
while True:
resp = sess.get(
url=f"https://haveibeenpwned.com/api/v3/pasteaccount/{account}",
params=HIBP_PARAMS,
headers=HIBP_HEADERS,
)
if resp.status_code == 429:
print("Quota exceeded, waiting for a while")
time.sleep(random.uniform(3, 7))
continue
if resp.status_code >= 400:
return {
"account": account,
"status": resp.status_code,
"result": resp.text,
}
return {
"account": account,
"status": resp.status_code,
"result": resp.json(),
}
def connect_splunk():
# TODO: return emails
return []
def main():
list_emails = [str(account) for account in connect_splunk()]
datestamp = datetime.datetime.now().isoformat().replace(":", "-")
output_filename = f"accounts-log-{datestamp}.jsonl"
print(f"Accounts to look up: {len(list_emails)}")
print(f"Output filename: {output_filename}")
with multiprocessing.Pool(processes=16) as p:
with open(output_filename, "a") as f:
results_iterable = p.imap_unordered(
check_pasteaccount, list_emails, chunksize=20
)
for result in tqdm.tqdm(
results_iterable,
total=len(list_emails),
unit="acc",
unit_scale=True,
):
print(json.dumps(result, sort_keys=True), file=f)
if __name__ == "__main__":
main()

How to make my program asynchronous with asyncio?

Here is my code:
import urllib
import webbrowser
from bs4 import BeautifulSoup
import requests
import re
address = 'https://google.com/search?q='
# Default Google search address start
file = open( "OCR.txt", "rt" )
# Open text document that contains the question
word = file.read()
file.close()
myList = [item for item in word.split('\n')]
newString = ' '.join(myList)
# The question is on multiple lines so this joins them together with proper spacing
qstr = urllib.parse.quote_plus(newString)
# Encode the string
newWord = address + qstr
# Combine the base and the encoded query
response = requests.get(newWord)
#with open('output.html', 'wb') as f:
# f.write(response.content)
#webbrowser.open('output.html')
answers = open("ocr2.txt", "rt")
ansTable = answers.read()
answers.close()
ans = ansTable.splitlines()
ans1 = str(ans[0])
ans2 = str(ans[2])
ans3 = str(ans[4])
ans1Score = 0
ans2Score = 0
ans3Score = 0
links = []
soup = BeautifulSoup(response.text, 'lxml')
for r in soup.find_all(class_='r'):
linkRaw = str(r)
link = re.search("(?P<url>https?://[^\s]+)", linkRaw).group("url")
if '&' in link:
finalLink = link.split('&')
link = str(finalLink[0])
links.append(link)
#print(links)
#print(' ')
for g in soup.find_all(class_='g'):
webBlock = str(g)
ans1Tally = webBlock.count(ans1)
ans2Tally = webBlock.count(ans2)
ans3Tally = webBlock.count(ans3)
if ans1 in webBlock:
ans1Score += ans1Tally
else:
ans1Found = False
if ans2 in webBlock:
ans2Score += ans2Tally
else:
ans2Found = False
if ans3 in webBlock:
ans3Score += ans3Tally
else:
ans3Found = False
if ans1Found and ans2Found and ans3Found is False:
searchLink = str(links[0])
if searchLink.endswith('pdf'):
pass
else:
response2 = requests.get(searchLink)
soup2 = BeautifulSoup(response2.text, 'lxml')
for p in soup2.find_all('p'):
extraBlock = str(p)
extraAns1Tally = extraBlock.count(ans1)
extraAns2tally = extraBlock.count(ans2)
extraAns3Tally = extraBlock.count(ans3)
if ans1 in extraBlock:
ans1Score += extraAns1Tally
if ans2 in extraBlock:
ans2Score += extraAns2Tally
if ans3 in extraBlock:
ans3Score += extraAns3Tally
with open("Results.txt", "w") as results:
results.write(newString + '\n\n')
results.write(ans1+": "+str(ans1Score)+'\n')
results.write(ans2+": "+str(ans2Score)+'\n')
results.write(ans3+": "+str(ans3Score))
links.pop(0)
print(' ')
print('-----')
print(ans1+": "+str(ans1Score))
print(ans2+": "+str(ans2Score))
print(ans3+": "+str(ans3Score))
print('-----')
Basically right now it is scraping each "g" one at a time, when this program can benefit massively from scraping each link all at the same time. For example, I want it to have them all scraping at the same time instead of waiting until the one before it is done. Sorry if this is a simple kind of question but I have little experience with asyncio so if anyone could help that would be massively appreciated. Thanks!
To write async program you need:
define functions with async def
call it with await
create event loop and run some function in it
run requests concurrently using asyncio.gather
All other is almost same as usual. Instead of using blocking request module you should use some async one. For example, aiohttp:
python -m pip install aiohttp
And use it like this:
async def get(url):
async with aiohttp.ClientSession() as session:
async with session.get('https://api.github.com/events') as resp:
return await resp.text()
Here's code with some changes I statrted. I didn't check if it's actually works since I don't have files you use. You should also move logic inside for g in soup.find_all(class_='g'): to seperate function and run multiple of these functions with asyncio.gather to benefit of asyncio.
import asyncio
import aiohttp
import urllib
import webbrowser
from bs4 import BeautifulSoup
import re
async def get(url):
async with aiohttp.ClientSession() as session:
async with session.get('https://api.github.com/events') as resp:
return await resp.text()
async def main():
address = 'https://google.com/search?q='
# Default Google search address start
file = open( "OCR.txt", "rt" )
# Open text document that contains the question
word = file.read()
file.close()
myList = [item for item in word.split('\n')]
newString = ' '.join(myList)
# The question is on multiple lines so this joins them together with proper spacing
qstr = urllib.parse.quote_plus(newString)
# Encode the string
newWord = address + qstr
# Combine the base and the encoded query
text = await get(newWord)
#with open('output.html', 'wb') as f:
# f.write(response.content)
#webbrowser.open('output.html')
answers = open("ocr2.txt", "rt")
ansTable = answers.read()
answers.close()
ans = ansTable.splitlines()
ans1 = str(ans[0])
ans2 = str(ans[2])
ans3 = str(ans[4])
ans1Score = 0
ans2Score = 0
ans3Score = 0
links = []
soup = BeautifulSoup(text, 'lxml')
for r in soup.find_all(class_='r'):
linkRaw = str(r)
link = re.search("(?P<url>https?://[^\s]+)", linkRaw).group("url")
if '&' in link:
finalLink = link.split('&')
link = str(finalLink[0])
links.append(link)
#print(links)
#print(' ')
for g in soup.find_all(class_='g'):
webBlock = str(g)
ans1Tally = webBlock.count(ans1)
ans2Tally = webBlock.count(ans2)
ans3Tally = webBlock.count(ans3)
if ans1 in webBlock:
ans1Score += ans1Tally
else:
ans1Found = False
if ans2 in webBlock:
ans2Score += ans2Tally
else:
ans2Found = False
if ans3 in webBlock:
ans3Score += ans3Tally
else:
ans3Found = False
if ans1Found and ans2Found and ans3Found is False:
searchLink = str(links[0])
if searchLink.endswith('pdf'):
pass
else:
text2 = await get(searchLink)
soup2 = BeautifulSoup(text2, 'lxml')
for p in soup2.find_all('p'):
extraBlock = str(p)
extraAns1Tally = extraBlock.count(ans1)
extraAns2tally = extraBlock.count(ans2)
extraAns3Tally = extraBlock.count(ans3)
if ans1 in extraBlock:
ans1Score += extraAns1Tally
if ans2 in extraBlock:
ans2Score += extraAns2Tally
if ans3 in extraBlock:
ans3Score += extraAns3Tally
with open("Results.txt", "w") as results:
results.write(newString + '\n\n')
results.write(ans1+": "+str(ans1Score)+'\n')
results.write(ans2+": "+str(ans2Score)+'\n')
results.write(ans3+": "+str(ans3Score))
links.pop(0)
print(' ')
print('-----')
print(ans1+": "+str(ans1Score))
print(ans2+": "+str(ans2Score))
print(ans3+": "+str(ans3Score))
print('-----')
if __name__ == '__main__':
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
finally:
loop.run_until_complete(loop.shutdown_asyncgens())
loop.close()
Upd:
Main idea is to move logic inside loop that does request into separate coroutine and pass multiple of these coroutines to asyncio.gather. It will parallelize your requests.
async def main():
# Her do all that are before the loop.
coros = [
process_single_g(g)
for g
in soup.find_all(class_='g')
]
results = await asyncio.gather(*coros) # this function will run multiple tasks concurrently
# and return all results together.
for res in results:
ans1Score, ans2Score, ans3Score = res
print(' ')
print('-----')
print(ans1+": "+str(ans1Score))
print(ans2+": "+str(ans2Score))
print(ans3+": "+str(ans3Score))
print('-----')
async def process_single_g(g):
# Here do all things you inside loop for concrete g.
text2 = await get(searchLink)
# ...
return ans1Score, ans2Score, ans3Score

Get all comments from a specific reddit thread in python

The official way,
r = praw.Reddit('Comment Scraper 1.0 by u/_Daimon_ see '
'https://praw.readthedocs.org/en/latest/'
'pages/comment_parsing.html')
submission = r.get_submission(submission_id='11v36o')
submission.replace_more_comments(limit=None, threshold=0)
is extremely slow. Is there a way to speed this up? There are people that have extracted every reddit comment into a database, so there must be some way to do this quicker.
Edit: the new praw api (6.0.0) has lists() which make the job easier:
This also handles AttributeError that might occure due to more_comments through the use of replace_more(limit=None)
submissionList = []
submission.comments.replace_more(limit=None)
for comment in submission.comments.list():
submissionList.append(comment)
Edit: The new praw api (5.0.1) is magical and makes this much easier. Here is how to do it now:
def getSubComments(comment, allComments, verbose=True):
allComments.append(comment)
if not hasattr(comment, "replies"):
replies = comment.comments()
if verbose: print("fetching (" + str(len(allComments)) + " comments fetched total)")
else:
replies = comment.replies
for child in replies:
getSubComments(child, allComments, verbose=verbose)
def getAll(r, submissionId, verbose=True):
submission = r.submission(submissionId)
comments = submission.comments
commentsList = []
for comment in comments:
getSubComments(comment, commentsList, verbose=verbose)
return commentsList
Example usage:
res = getAll(r, "6rjwo1")
#res = getAll(r, "6rjwo1", verbose=False) # This won't print out progress if you want it to be silent. Default is verbose=True
Where r is
username = 'myusernamehere'
userAgent = "MyAppName/0.1 by " + username
clientId = 'myClientId'
clientSecret = "myClientSecret"
password = "passwordformyusernamehere"
r = praw.Reddit(user_agent=userAgent, client_id=clientId, client_secret=clientSecret)
Previous stuff (outdated now):
Okay, I wrote code that can reliably pull every comment from a thread, and takes about 10 seconds for 500 comments, and about a minute for 4000 comments. I named this redApi.py Here it is:
import time
import requests
import requests.auth
import praw
username = 'myusernamehere'
userAgent = "MyAppName/0.1 by " + username
clientId = 'myClientId'
clientSecret = "myClientSecret"
password = "passwordformyusernamehere"
def getPraw():
return praw.Reddit(user_agent=userAgent, client_id=clientId, client_secret=clientSecret)
global accessToken
accessToken = None
def getAccessToken():
client_auth = requests.auth.HTTPBasicAuth(clientId, clientSecret)
post_data = {"grant_type": "password", "username": username, "password": password}
headers = {"User-Agent": userAgent}
response = requests.post("https://www.reddit.com/api/v1/access_token", auth=client_auth, data=post_data, headers=headers)
return response.json()
def makeRequest(apiUrl, useGet=True):
global accessToken
if accessToken is None:
accessToken = getAccessToken()
headers = {"Authorization": "bearer " + accessToken['access_token'], "User-Agent": userAgent}
if useGet:
response = requests.get(apiUrl, headers=headers)
else:
response = requests.post(apiUrl, headers=headers)
time.sleep(1.1)
responseJson = response.json()
if 'error' in responseJson:
if responseJson['error'] == 401:
print "Refreshing access token"
time.sleep(1.1)
accessToken = getAccessToken()
headers = {"Authorization": "bearer " + accessToken['access_token'], "User-Agent": userAgent}
time.sleep(1.1)
response = requests.get(apiUrl, headers=headers)
responseJson = response.json()
return responseJson
global prawReddit
prawReddit = praw.Reddit(user_agent=userAgent, client_id=clientId, client_secret=clientSecret)
# Gets any number of posts
def getPosts(subredditName, numPosts=1000):
global prawReddit
subreddit = prawReddit.get_subreddit(subredditName)
postGetter = praw.helpers.submissions_between(prawReddit, subreddit)
postArray = []
numGotten = 0
while numGotten < numPosts:
postArray.append(postGetter.next())
numGotten += 1
return postArray
# Get all comments from a post
# Submission is a praw submission, obtained via:
# r = redApi.getPraw()
# submission = r.get_submission(submission_id='2zysz7') # (or some other submission id, found via https://www.reddit.com/r/test/comments/2zysz7/ayy/ - the thing after /comments/)
# comments = redApi.getComments(submission)
def getComments(submission):
requestUrl = 'https://oauth.reddit.com/' + submission.subreddit.url + 'comments/article?&limit=1000&showmore=true&article=' + submission.id
allData = makeRequest(requestUrl)
articleData = allData[0]
comments = allData[1]
curComments = comments['data']['children']
resultComments = getCommentsHelper(curComments, submission.name, submission)
return resultComments
# Print out the tree of comments
def printTree(comments):
return printTreeHelper(comments, "")
def printTreeHelper(comments, curIndentation):
resultString = ""
for comment in comments:
resultString += curIndentation + comment['data']['body'].replace("\n", "\n" + curIndentation) + "\n"
if not comment['data']['replies'] == "":
resultString += printTreeHelper(comment['data']['replies']['data']['children'], curIndentation + " ")
return resultString
# Get all comments as a single array
def flattenTree(comments):
allComments = []
for comment in comments:
allComments.append(comment)
if not comment['data']['replies'] == "":
allComments += flattenTree(comment['data']['replies']['data']['children'])
return allComments
# Utility functions for getComments
def expandCommentList(commentList, submission):
curComments = commentList
allComments = {}
while True:
thingsToExpand = []
nextComments = []
allParents = {}
for comment in curComments:
if comment['kind'] == "more":
thingsToExpand += comment['data']['children']
else:
if comment['data']['body'][:len("If they are shipping")] == "If they are shipping":
print comment
allComments[comment['data']['name']] = comment
if len(thingsToExpand) == 0:
curComments = []
break
curComments = []
if not len(thingsToExpand) == 0:
print "total things to expand: " + str(len(thingsToExpand))
for i in range(0, len(thingsToExpand)/100+1):
curCommentIds = thingsToExpand[i*100:min((i+1)*100, len(thingsToExpand))]
requestUrl = 'https://oauth.reddit.com/api/morechildren.json?api_type=json&link_id=' + submission.name + '&limit=1000&showmore=true&children=' + ",".join(curCommentIds)
curData = makeRequest(requestUrl)
if 'json' in curData and 'data' in curData['json']:
curComments += curData['json']['data']['things']
print (i+1)*100
for comment in curComments:
allComments[comment['data']['name']] = comment
return allComments.values()
def lookForMore(comment):
if comment['kind'] == "more":
return True
if not comment['data']['replies'] == "":
for reply in comment['data']['replies']['data']['children']:
if lookForMore(reply):
return True
return False
def getCommentsHelper(curComments, rootId, submission):
allComments = expandCommentList(curComments, submission)
commentMap = {}
for comment in allComments:
commentMap[comment['data']['name']] = comment
allRootComments = []
for comment in allComments:
if comment['data']['parent_id'] == rootId:
allRootComments.append(comment)
elif comment['data']['parent_id'] in commentMap:
parentComment = commentMap[comment['data']['parent_id']]
if parentComment['data']['replies'] == "":
parentComment['data']['replies'] = {'data': {'children': []}}
alreadyChild = False
for childComment in parentComment['data']['replies']['data']['children']:
if childComment['data']['name'] == comment['data']['name']:
alreadyChild = True
break
if not alreadyChild:
parentComment['data']['replies']['data']['children'].append(comment)
else:
print "pls halp"
completedComments = []
needMoreComments = []
for comment in allRootComments:
if not comment['data']['replies'] == "" or comment['kind'] == 'more':
hasMore = lookForMore(comment)
if hasMore:
needMoreComments.append(comment)
else:
replyComments = getCommentsHelper(comment['data']['replies']['data']['children'], comment['data']['name'], submission)
comment['data']['replies']['data']['children'] = replyComments
completedComments.append(comment)
else:
completedComments.append(comment)
for comment in needMoreComments:
requestUrl = 'https://oauth.reddit.com/' + submission.subreddit.url + 'comments/article?&limit=1000&showmore=true&article=' + submission.id + "&comment=" + comment['data']['id']
allData = makeRequest(requestUrl)
articleData = allData[0]
comment = allData[1]['data']['children'][0]
if comment['data']['replies'] == "":
completedComments.append(comment)
else:
comments = comment['data']['replies']['data']['children']
actualComments = getCommentsHelper(comments, comment['data']['name'], submission)
comment['data']['replies']['data']['children'] = actualComments
completedComments.append(comment)
return completedComments
To use this script, in a python prompt, type the following:
# Get all comments from a post
# Submission is a praw submission, obtained via:
r = redApi.getPraw()
submission = r.get_submission(submission_id='2zysz7') # (or some other submission id, found via https://www.reddit.com/r/test/comments/2zysz7/ayy/ - the thing after /comments/)
comments = redApi.getComments(submission)
Looks like praw has been updated? In 4.5.1 it looks more like:
#!/usr/local/bin/python
import praw
reddit = praw.Reddit(
client_id='<client_id>',
client_secret='<client_secret>',
user_agent='davehodg/0.1')
submission = reddit.submission(id='<submission_id>')
sumbission = reddit.submission(url='<submission_url>') #in case you don't have submission id
for comment in submission.comments.list():
print(comment.body)
Edit: seems like the most I can get back is 1000 comments?
Im adding a bunch of prints and debugging, but right now #danielle your script does nothing. Just returned back to prompt.

Categories