I am trying to use Reddit's developer API to build a simple scraper that grabs posts and their replies in a target subreddit and produces JSON with the information.
I am getting a 404 error that I don't understand.
This is my code:
import praw
import json
def scrape(subreddit, limit):
r = praw.Reddit(user_agent='Reddit data organizer 1.0 by /u/reallymemorable', client_id='none of your business', client_secret='none of your business')
submissions = r.subreddit(subreddit).get_hot(limit=limit)
for submission in submissions:
data = {}
data['title'] = submission.title
data['score'] = submission.score
data['url'] = submission.url
data['author'] = str(submission.author)
data['subreddit'] = str(submission.subreddit)
data['num_comments'] = submission.num_comments
data['over_18'] = submission.over_18
data['selftext'] = submission.selftext
data['is_self'] = submission.is_self
data['name'] = submission.name
data['created_utc'] = submission.created_utc
data['permalink'] = submission.permalink
data['domain'] = submission.domain
data['id'] = submission.id
data['kind'] = submission.kind
json.dumps(data)
scrape('https://www.reddit.com/r/funny/', 25)
When I run it, I get this:
reallymemorable#Christians-MBP Desktop % python3 fetch-data-subreddit.py
Traceback (most recent call last):
File "/Users/reallymemorable/Desktop/fetch-data-subreddit.py", line 26, in <module>
scrape('https://www.reddit.com/r/augmentedreality/comments/yv7sn8/ar_maximum_distance/', 25)
File "/Users/reallymemorable/Desktop/fetch-data-subreddit.py", line 6, in scrape
submissions = r.subreddit(subreddit).get_hot(limit=limit)
File "/opt/homebrew/lib/python3.9/site-packages/praw/models/reddit/base.py", line 34, in __getattr__
self._fetch()
File "/opt/homebrew/lib/python3.9/site-packages/praw/models/reddit/subreddit.py", line 583, in _fetch
data = self._fetch_data()
File "/opt/homebrew/lib/python3.9/site-packages/praw/models/reddit/subreddit.py", line 580, in _fetch_data
return self._reddit.request(method="GET", params=params, path=path)
File "/opt/homebrew/lib/python3.9/site-packages/praw/util/deprecate_args.py", line 43, in wrapped
return func(**dict(zip(_old_args, args)), **kwargs)
File "/opt/homebrew/lib/python3.9/site-packages/praw/reddit.py", line 941, in request
return self._core.request(
File "/opt/homebrew/lib/python3.9/site-packages/prawcore/sessions.py", line 330, in request
return self._request_with_retries(
File "/opt/homebrew/lib/python3.9/site-packages/prawcore/sessions.py", line 266, in _request_with_retries
raise self.STATUS_EXCEPTIONS[response.status_code](response)
prawcore.exceptions.NotFound: received 404 HTTP response
r.subreddit(subreddit) - subreddit should just be the name of the subreddit e.g. "funny" and not the full URL.
See the docs here: https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#obtain-a-subreddit
Related
I'm trying to make a search with youtube API search method. It worked a few days, but since yesterday it started showming me a Timeout Error
The part of the code:
def YoutubeSearch():
# this is used to build our youtube api
youtube = build('youtube', 'v3', developerKey= 'mykey')
date_tolook = CalculateDate() #Function to calculate the date to look for
request = youtube.search().list(
type = "channel",
part = "snippet",
q = domain_name(ls[15]),
maxResults = 1
)
response = request.execute()
print(response)
duration = CalculateVideoDuration()
if response['pageInfo']['totalResults'] > 0 :
chanid = response['items'][0]['id']['channelId']
request2 = youtube.search().list(
part="snippet",
type = "video",
channelId= chanid,
publishedAfter= date_tolook,
videoDuration=duration,
order = "date"
)
response2 = request2.execute()
print(response2)
if response2['pageInfo']['totalResults'] > 0:
title = response['items'][0]['snippet']['title']
videoid = response['items'][0]['id']['videoId']
request3 = youtube.videos().list(
part="statistics",
id = videoid
)
response3 = request3.execute()
print(response)
numberofviews = response['items'][0]['statistics']['viewCount']
numberoflikes = response['items'][0]['statistics']['likeCount']
SendEmail(ls[15], title, numberofviews, numberoflikes)
CreateCsv(ls[15])
The error:
Traceback (most recent call last):
File "C:\Users\bajan\AppData\Local\Programs\Python\Python310\lib\tkinter\__init__.py", line 1921, in __call__
return self.func(*args)
File "C:\Users\bajan\Desktop\Informatica\Python\YoutubeSearchBot\lib\site-packages\customtkinter\widgets\ctk_button.py", line 372, in clicked
self.command()
File "c:\Users\bajan\Desktop\Informatica\Python\YoutubeSearchBot\main.py", line 70, in doSomething
YoutubeSearch()
File "c:\Users\bajan\Desktop\Informatica\Python\YoutubeSearchBot\main.py", line 119, in YoutubeSearch
response = request.execute()
File "C:\Users\bajan\Desktop\Informatica\Python\YoutubeSearchBot\lib\site-packages\googleapiclient\_helpers.py", line 130, in positional_wrapper
return wrapped(*args, **kwargs)
File "C:\Users\bajan\Desktop\Informatica\Python\YoutubeSearchBot\lib\site-packages\googleapiclient\http.py", line 923, in execute
resp, content = _retry_request(
File "C:\Users\bajan\Desktop\Informatica\Python\YoutubeSearchBot\lib\site-packages\googleapiclient\http.py", line 191, in _retry_request
resp, content = http.request(uri, method, *args, **kwargs)
File "C:\Users\bajan\Desktop\Informatica\Python\YoutubeSearchBot\lib\site-packages\httplib2\__init__.py", line 1701, in request
(response, content) = self._request(
File "C:\Users\bajan\Desktop\Informatica\Python\YoutubeSearchBot\lib\site-packages\httplib2\__init__.py", line 1421, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "C:\Users\bajan\Desktop\Informatica\Python\YoutubeSearchBot\lib\site-packages\httplib2\__init__.py", line 1343, in _conn_request
conn.connect()
File "C:\Users\bajan\Desktop\Informatica\Python\YoutubeSearchBot\lib\site-packages\httplib2\__init__.py", line 1133, in connect
sock.connect((self.host, self.port))
Libraries i use:
from sendgrid.helpers.mail import Mail, Email, To, Content
from sendgrid import SendGridAPIClient
from datetime import datetime, timedelta
from googleapiclient.discovery import build
from tkinter.filedialog import askopenfile
import tkinter
import customtkinter
import pandas as pd
import sendgrid
import os
import re
So i thinks it's something about the port or it might be from youtube server? I have tried everything i could find on the internet but i failed. Please help. Thanks!
I am looking for some advice on how I can upsert or replace existing user entity.
I tried couple of API's documented here and also here.
The entities are read from database and the plan is to keep them in sync with database values as a scheduled job.
Update: Code Snippet
client_options = {"quota_project_id": gcp_default_project_id,
"api_endpoint": "us-central1-dialogflow.googleapis.com:443"}
client = EntityTypesClient(credentials=credentials_det, client_options=client_options)
entity_type = v3beta.EntityType()
entity_type.display_name = entity_display_name
entity_type.kind = "KIND_REGEXP"
print(client_options)
entity_type.entities = entity_json
# Initialize request argument(s)
request = v3beta.UpdateEntityTypeRequest(
entity_type=entity_type,
)
print(request)
response = client.update_entity_type(request=request)
print(response)
entity_json is fetched from DB and created as JSON object as below.
df = get_data.get_df_details(config_dir, entity_data_source, sql)
username = df['username'].tolist()
entity_json = []
for each in username:
each_entity_value = {}
each_entity_value['value'] = each
each_entity_value['synonyms'] = [each]
entity_json.append(each_entity_value)
Here's the Trace
Traceback (most recent call last):
File "/Users/<some_dir>/df_ins_entities/df_ins_entities/ins_entity_val.py", line 116, in
ins_now(config_dir, input_entity_name, entity_data_source)
File "/Users/<some_dir>/df_ins_entities/df_ins_entities/ins_entity_val.py", line 96, in ins_now
response = client.update_entity_type(request=request)
File "/Users/<some_dir>/df_ins_entities/lib/python3.9/site-packages/google/cloud/dialogflowcx_v3beta1/services/entity_types/client.py", line 902, in update_entity_type
response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,)
File "/Users/<some_dir>/df_ins_entities/lib/python3.9/site-packages/google/api_core/gapic_v1/method.py", line 154, in call
return wrapped_func(args, **kwargs)
File "/Users/<some_dir>/df_ins_entities/lib/python3.9/site-packages/google/api_core/grpc_helpers.py", line 59, in error_remapped_callable
raise exceptions.from_grpc_error(exc) from exc
google.api_core.exceptions.InvalidArgument: 400 Resource name '' does not match 'projects//locations//agents//entityTypes/*'.
Process finished with exit code 1
I am trying to get the post's text of a megagroup in Telegram using Telethon. I could get the messages from chats however megagroup's posts cannot be retrieved using the same method (How to get channels and groups data from my Telegram account?(Python)). Are megagroups open to fetch their posts by being a simple user using telethon?
Code:
def get_entity_data(entity_id, limit):
entity = client.get_entity(entity_id)
posts = client(GetHistoryRequest(peer=entity, limit=limit, offset_date=None, offset_id=0, max_id=0, min_id=0, add_offset=0, hash=0))
messages = []
for message in posts.messages:
messages.append(message.message)
return messages
result = client(GetDialogsRequest(offset_date=None, offset_id=0, offset_peer=InputPeerEmpty(), limit=100, hash=0)) entities = result.chats entities.reverse()
for entity in entities:
title = entity.title
messages = get_entity_data(entity.id, 10)
print(title + ' :')
print(messages)
print('#######')
and the error message is:
Traceback (most recent call last):
File "./search_message3.py", line 61, in <module>
messages = get_entity_data(entity.id, 10)
File "./search_message3.py", line 48, in get_entity_data
entity = client.get_entity(entity_id)
File "/home/carlos/.local/lib/python3.8/site-packages/telethon/sync.py", line 39, in syncified
return loop.run_until_complete(coro)
File "/home/carlos/.miniconda3/lib/python3.8/asyncio/base_events.py", line 616, in run_until_complete
return future.result()
File "/home/carlos/.local/lib/python3.8/site-packages/telethon/client/users.py", line 316, in get_entity
chats = (await self(
File "/home/carlos/.local/lib/python3.8/site-packages/telethon/client/users.py", line 30, in __call__
return await self._call(self._sender, request, ordered=ordered)
File "/home/carlos/.local/lib/python3.8/site-packages/telethon/client/users.py", line 84, in _call
result = await future
telethon.errors.rpcerrorlist.PeerIdInvalidError: An invalid Peer was used. Make sure to pass the right peer type and that the value is valid (for instance, bots cannot start conversations) (caused by GetChatsRequest)
Carlos
I am trying to scrape content from a website but I am getting the below mentioned error
The method:
def scrape_newtimes():
"""Scrapes content from the NewTimes"""
url = 'https://www.newtimes.co.rw/'
r = requests.get(url, headers=HEADERS)
tree = fromstring(r.content)
links = tree.xpath('//div[#class="x-small-push clearfix"]/a/#href')
for link in links:
r = requests.get(link, headers=HEADERS)
blog_tree = fromstring(r.content)
paras = blog_tree.xpath('//div[#class="article-content"]/p')
para = extract_paratext(paras)
text = extract_text(para)
if not text:
continue
yield '"%s" %s' % (text, link)
The error I am getting:
>>> sc = scrape_newtimes()
>>> string_1 = next(sc)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "D:\Projects\bird\bird-env\bot.py", line 58, in scrape_newtimes
r = requests.get(link, headers=HEADERS)
File "D:\Projects\bird\venv\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "D:\Projects\bird\venv\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "D:\Projects\bird\venv\lib\site-packages\requests\sessions.py", line 519, in request
prep = self.prepare_request(req)
File "D:\Projects\bird\venv\lib\site-packages\requests\sessions.py", line 462, in prepare_request
hooks=merge_hooks(request.hooks, self.hooks),
File "D:\Projects\bird\venv\lib\site-packages\requests\models.py", line 313, in prepare
self.prepare_url(url, params)
File "D:\Projects\bird\venv\lib\site-packages\requests\models.py", line 387, in prepare_url
raise MissingSchema(error)
requests.exceptions.MissingSchema: Invalid URL '/news/londons-kings-college-launch-civil-service-programme-rwanda': No schema supplied. Perhaps you meant http:///news/londons-kings-college-launch-civil-service-programme-rwanda?
>>>
The exception basically tells you what is wrong:
requests.exceptions.MissingSchema: Invalid URL '/news/londons-kings-college-launch-civil-service-programme-rwanda': No schema supplied. Perhaps you meant http:///news/londons-kings-college-launch-civil-service-programme-rwanda?
Or with line wrapping the line:
Invalid URL '/news/londons-kings-college-launch-civil-service-programme-rwanda':
No schema supplied. Perhaps you meant
http:///news/londons-kings-college-launch-civil-service-programme-rwanda?
You link does not contain a complete URL
I'm trying to deploy a python web server. Main function of following code is to generate a qrcode and transfer the image to binary file.
def generate_qrcode(date, user_id):
qr = qrcode.QRCode(
version=1,
error_correction=constants.ERROR_CORRECT_L,
box_size=10,
border=4,
)
base_string = config_default.configs.get('const').get('url') + '/check_in/'
generate_string = date + '#' +user_id
qr.add_data(base_string + generate_string, qrcode)
qr.make(fit=True)
img = qr.make_image()
return Image.fromqimage(img) # here is the place returns exception
Then post this binary file to a url.
def upload_qrcode(datetime, user_id):
url = "https://api.weixin.qq.com/cgi-bin/media/upload
access_token = get_access_token()
querystring = {"access_token":access_token,"type":"image"}
files = {"media":generate_qrcode(datetime,user_id)}
response = requests.post(url, params=querystring, files=files)
Here is the exception:
Traceback (most recent call last):
File "/Users/yudayan/Documents/myCode/pythonCode/nuobao/QRC_maker.py", line 58, in <module>
main()
File "/Users/yudayan/Documents/myCode/pythonCode/nuobao/QRC_maker.py", line 54, in main
print(upload_qrcode("sdf", "adsf"))
File "/Users/yudayan/Documents/myCode/pythonCode/nuobao/QRC_maker.py", line 47, in upload_qrcode
files = {"media":generate_qrcode(datetime,user_id)}
File "/Users/yudayan/Documents/myCode/pythonCode/nuobao/QRC_maker.py", line 32, in generate_qrcode
return Image.fromqimage(img)
File "/Users/yudayan/anaconda/lib/python3.6/site-packages/PIL/Image.py", line 2321, in fromqimage
return ImageQt.fromqimage(im)
File "/Users/yudayan/anaconda/lib/python3.6/site-packages/PIL/ImageQt.py", line 59, in fromqimage
if im.hasAlphaChannel():
File "/Users/yudayan/anaconda/lib/python3.6/site-packages/qrcode/image/pil.py", line 50, in __getattr__
return getattr(self._img, name)
AttributeError: 'Image' object has no attribute 'hasAlphaChannel'