I am trying to use Reddit's developer API to build a simple scraper that grabs posts and their replies in a target subreddit and produces JSON with the information.
I am getting a 404 error that I don't understand.
This is my code:
import praw
import json
def scrape(subreddit, limit):
r = praw.Reddit(user_agent='Reddit data organizer 1.0 by /u/reallymemorable', client_id='none of your business', client_secret='none of your business')
submissions = r.subreddit(subreddit).get_hot(limit=limit)
for submission in submissions:
data = {}
data['title'] = submission.title
data['score'] = submission.score
data['url'] = submission.url
data['author'] = str(submission.author)
data['subreddit'] = str(submission.subreddit)
data['num_comments'] = submission.num_comments
data['over_18'] = submission.over_18
data['selftext'] = submission.selftext
data['is_self'] = submission.is_self
data['name'] = submission.name
data['created_utc'] = submission.created_utc
data['permalink'] = submission.permalink
data['domain'] = submission.domain
data['id'] = submission.id
data['kind'] = submission.kind
json.dumps(data)
scrape('https://www.reddit.com/r/funny/', 25)
When I run it, I get this:
reallymemorable#Christians-MBP Desktop % python3 fetch-data-subreddit.py
Traceback (most recent call last):
File "/Users/reallymemorable/Desktop/fetch-data-subreddit.py", line 26, in <module>
scrape('https://www.reddit.com/r/augmentedreality/comments/yv7sn8/ar_maximum_distance/', 25)
File "/Users/reallymemorable/Desktop/fetch-data-subreddit.py", line 6, in scrape
submissions = r.subreddit(subreddit).get_hot(limit=limit)
File "/opt/homebrew/lib/python3.9/site-packages/praw/models/reddit/base.py", line 34, in __getattr__
self._fetch()
File "/opt/homebrew/lib/python3.9/site-packages/praw/models/reddit/subreddit.py", line 583, in _fetch
data = self._fetch_data()
File "/opt/homebrew/lib/python3.9/site-packages/praw/models/reddit/subreddit.py", line 580, in _fetch_data
return self._reddit.request(method="GET", params=params, path=path)
File "/opt/homebrew/lib/python3.9/site-packages/praw/util/deprecate_args.py", line 43, in wrapped
return func(**dict(zip(_old_args, args)), **kwargs)
File "/opt/homebrew/lib/python3.9/site-packages/praw/reddit.py", line 941, in request
return self._core.request(
File "/opt/homebrew/lib/python3.9/site-packages/prawcore/sessions.py", line 330, in request
return self._request_with_retries(
File "/opt/homebrew/lib/python3.9/site-packages/prawcore/sessions.py", line 266, in _request_with_retries
raise self.STATUS_EXCEPTIONS[response.status_code](response)
prawcore.exceptions.NotFound: received 404 HTTP response
r.subreddit(subreddit) - subreddit should just be the name of the subreddit e.g. "funny" and not the full URL.
See the docs here: https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#obtain-a-subreddit
What should I do? I'm getting this error. I want add some tags for FLAC.
I searched but i didnt find anythings. Please help me.
Traceback (most recent call last):
File "indir.py", line 50, in <module>
audio.save()
File "/usr/local/lib/python3.6/dist-packages/mutagen/_util.py", line 169, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/mutagen/_util.py", line 140, in wrapper
return func(self, h, *args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/mutagen/flac.py", line 847, in save
self._save(filething, self.metadata_blocks, deleteid3, padding)
File "/usr/local/lib/python3.6/dist-packages/mutagen/flac.py", line 864, in _save
metadata_blocks, available, content_size, padding)
File "/usr/local/lib/python3.6/dist-packages/mutagen/flac.py", line 154, in _writeblocks
data += cls._writeblock(block)
File "/usr/local/lib/python3.6/dist-packages/mutagen/flac.py", line 126, in _writeblock
datum = block.write()
File "/usr/local/lib/python3.6/dist-packages/mutagen/flac.py", line 620, in write
f.write(self.data)
TypeError: a bytes-like object is required, not 'str'
My Code:
audio = FLAC("music.flac")
audio['artist'] = sarki.artist.name
audio['title'] = sarki.name
pic = Picture()
pic.type = id3.PictureType.COVER_FRONT
pic.width = 640
pic.height = 640
pic.mime = 'image/jpeg'
pic.data = "music.jpg"
audio.add_picture(pic)
audio.save()
I believe the error is here:
pic.data = "music.jpg"
You are attempting to set the image data of the picture to be a string. I'm guessing you wanted to set the image data to be the contents of the file music.jpg instead. If so, try replacing this line with the following two:
with open("music.jpg", "rb") as f:
pic.data = f.read()
This follows an example in the Mutagen API reference.
os : windows 10 64bits
python: 3.7.3(anaconda)
# -*- coding: utf-8 -*-
from aip import AipSpeech
APP_ID = ''
API_KEY = 'xxx'
SECRET_KEY = 'yyy'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
def get_file_content(filePath):
with open(filePath, 'rb') as fp:
return fp.read()
# call "client.asr(get_file_content('01.wav'), 'wav', 16000)" will throw exception
results = client.asr(get_file_content('01.wav'), 'wav', 16000)
print(results)
Error messages
"""
Traceback (most recent call last):
File "baidu_speech_reg_api.py", line 18, in <module>
results = client.asr(get_file_content('01.wav'), 'wav', 16000)
File "C:\Users\yyyy\Anaconda3\envs\pyside2\lib\site-packages\aip\speech.py", line 78, in asr
return self._request(self.__asrUrl, data)
File "C:\Users\yyyy\Anaconda3\envs\pyside2\lib\site-packages\aip\base.py", line 90, in _request
params = self._getParams(authObj)
File "C:\Users\yyyy\Anaconda3\envs\pyside2\lib\site-packages\aip\base.py", line 190, in _getParams
params['access_token'] = authObj['access_token']
KeyError: 'access_token'
"""
I can use the same keys for ocr and image classification, but speech recognition always fail.
I am not sure what the issue is for this. Is it an issue with the credentials? I am trying to insert data from GCP to a Google BigQuery. Here is the full error:
Traceback (most recent call last):
File "target.py", line 98, in <module>
main()
File "target.py", line 94, in main
insert_data(gcs_file)
File "target.py", line 85, in insert_data
bq = BigQueryClient(project)
File "/Users/xxx/Prog/emr-etl/xx_auth.py", line 58, in BigQueryClient
credentials = Credentials.from_service_account_file(os.getenv('GOOGLE_APPLICATION_CREDENTIALS'))
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/google/oauth2/service_account.py", line 209, in from_service_account_file
filename, require=['client_email', 'token_uri'])
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/google/auth/_service_account_info.py", line 71, in from_filename
with io.open(filename, 'r', encoding='utf-8') as json_file:
TypeError: expected str, bytes or os.PathLike object, not NoneType
And here is the code:
def upload_files(files, gcs_bucket="tracker"):
storage_client = storage.Client(project='xxx-main')
bucket = storage_client.get_bucket("tracker")
for file in files:
destination_filepath = file['folder'] + '/' + file['filename']
source_filepath = file['local_filename']
gcs_file = bucket.blob(destination_filepath)
gcs_file.upload_from_filename(source_filepath)
return gcs_file
def insert_data(gcs_file, project="xxx-main"):
bq = BigQueryClient(project)
bq_job_config = QueryJobConfig()
job = bq.load_table_from_uri(gcs_file, 'snowplow', job_config=bq_job_config)
result = job.result()
def main():
lists = list_download(sp_bucket)
gcs_file = upload_files(lists)
insert_data(gcs_file)
if __name__ == "__main__":
main()
I'm writing code for a Django-based static blog, but I am coming across this similar issue across 3 or 4 different areas of my code. I figured if I can get one fixed then I can get the others fixed as well. My code of focus will be a django-command that I call update_blog1. Here's the traceback...
Traceback (most recent call last):
File "C:\Python34\lib\site-packages\django\core\handlers\base.py", line 132, in get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "C:/Users/Jaysp_000/firstSite/PROJECTone\blog_static\views.py", line 179, in archive
{'posts' : posts}
File "C:/Users/Jaysp_000/firstSite/PROJECTone\blog_static\views.py", line 14, in render_response
return render_to_response(*args, **kwargs)
File "C:\Python34\lib\site-packages\django\shortcuts.py", line 45, in render_to_response using=using)
File "C:\Python34\lib\site-packages\django\template\loader.py", line 116, in render_to_string
template_name, context, context_instance, dirs, dictionary)
File "C:\Python34\lib\site-packages\django\template\engine.py", line 221, in render_to_string
return t.render(context_instance)
File "C:\Python34\lib\site-packages\django\template\base.py", line 208, in render
with context.bind_template(self):
File "C:\Python34\lib\contextlib.py", line 59, in __enter__
return next(self.gen)
File "C:\Python34\lib\site-packages\django\template\context.py", line 235, in bind_template
updates.update(processor(self.request))
File "C:\Python34\lib\site-packages\django\template\context_processors.py", line 56, in i18n
context_extras['LANGUAGE_BIDI'] = translation.get_language_bidi()
File "C:\Python34\lib\site-packages\django\utils\translation\__init__.py", line 177, in get_language_bidi
return _trans.get_language_bidi()
File "C:\Python34\lib\site-packages\django\utils\translation\trans_real.py", line 263, in get_language_bidi
base_lang = get_language().split('-')[0]
AttributeError: 'NoneType' object has no attribute 'split'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Python34\lib\site-packages\django\utils\formats.py", line 103, in get_format
cached = _format_cache[cache_key]
KeyError: ('r', None)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Program Files (x86)\JetBrains\PyCharm 4.5.3\helpers\pycharm\django_manage.py", line 41, in <module>
run_module(manage_file, None, '__main__', True)
File "C:\Python34\lib\runpy.py", line 182, in run_module
return _run_module_code(code, init_globals, run_name, mod_spec)
File "C:\Python34\lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "C:\Python34\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:/Users/Jaysp_000/firstSite/PROJECTone\manage.py", line 10, in <module>
execute_from_command_line(sys.argv)
File "C:\Python34\lib\site-packages\django\core\management\__init__.py", line 338, in execute_from_command_line
utility.execute()
File "C:\Python34\lib\site-packages\django\core\management\__init__.py", line 330, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "C:\Python34\lib\site-packages\django\core\management\base.py", line 390, in run_from_argv
self.execute(*args, **cmd_options)
File "C:\Python34\lib\site-packages\django\core\management\base.py", line 441, in execute
output = self.handle(*args, **options)
File "C:/Users/Jaysp_000/firstSite/PROJECTone\blog_static\management\commands\update_blog1.py", line 78, in handle
resp = client.get(path)
File "C:\Python34\lib\site-packages\django\test\client.py", line 500, in get
**extra)
File "C:\Python34\lib\site-packages\django\test\client.py", line 303, in get
return self.generic('GET', path, secure=secure, **r)
File "C:\Python34\lib\site-packages\django\test\client.py", line 379, in generic
return self.request(**r)
File "C:\Python34\lib\site-packages\django\test\client.py", line 448, in request
response = self.handler(environ)
File "C:\Python34\lib\site-packages\django\test\client.py", line 122, in __call__
response = self.get_response(request)
File "C:\Python34\lib\site-packages\django\core\handlers\base.py", line 218, in get_response
response = self.handle_uncaught_exception(request, resolver, sys.exc_info())
File "C:\Python34\lib\site-packages\django\core\handlers\base.py", line 261, in handle_uncaught_exception
return debug.technical_500_response(request, *exc_info)
File "C:\Python34\lib\site-packages\django\views\debug.py", line 97, in technical_500_response
html = reporter.get_traceback_html()
File "C:\Python34\lib\site-packages\django\views\debug.py", line 384, in get_traceback_html
return t.render(c)
File "C:\Python34\lib\site-packages\django\template\base.py", line 209, in render
return self._render(context)
File "C:\Python34\lib\site-packages\django\template\base.py", line 201, in _render
return self.nodelist.render(context)
File "C:\Python34\lib\site-packages\django\template\base.py", line 903, in render
bit = self.render_node(node, context)
File "C:\Python34\lib\site-packages\django\template\debug.py", line 79, in render_node
return node.render(context)
File "C:\Python34\lib\site-packages\django\template\debug.py", line 89, in render
output = self.filter_expression.resolve(context)
File "C:\Python34\lib\site-packages\django\template\base.py", line 674, in resolve
new_obj = func(obj, *arg_vals)
File "C:\Python34\lib\site-packages\django\template\defaultfilters.py", line 771, in date
return formats.date_format(value, arg)
File "C:\Python34\lib\site-packages\django\utils\formats.py", line 136, in date_format
return dateformat.format(value, get_format(format or 'DATE_FORMAT', use_l10n=use_l10n))
File "C:\Python34\lib\site-packages\django\utils\formats.py", line 110, in get_format
for module in get_format_modules(lang):
File "C:\Python34\lib\site-packages\django\utils\formats.py", line 82, in get_format_modules
modules = _format_modules_cache.setdefault(lang, list(iter_format_modules(lang, settings.FORMAT_MODULE_PATH)))
File "C:\Python34\lib\site-packages\django\utils\formats.py", line 51, in iter_format_modules
if not check_for_language(lang):
File "C:\Python34\lib\site-packages\django\utils\translation\__init__.py", line 181, in check_for_language
return _trans.check_for_language(lang_code)
File "C:\Python34\lib\functools.py", line 472, in wrapper
result = user_function(*args, **kwds)
File "C:\Python34\lib\site-packages\django\utils\translation\trans_real.py", line 409, in check_for_language
if not language_code_re.search(lang_code):
TypeError: expected string or buffer
Here's my code for the update_blog1
from django.core.management.base import BaseCommand
from django.core.urlresolvers import reverse
from django.test.client import Client
import sys, os
from optparse import make_option
from P1config.settings import STATICBLOG_POST_DIRECTORY, STATICBLOG_COMPILE_DIRECTORY
class Command(BaseCommand):
help = "Compile blog posts from html to markdown, and upload images to S3 Defaults to processing only new blog posts"
option_list = BaseCommand.option_list + (
make_option(
'--all',
action='store_true',
dest='all',
default=False,
help='Get all blog posts, regardless of date'
),
make_option(
'--name',
action='store',
dest='post_name',
default=False,
help='Get named blog post'
),
)
def handle(self, *args, **options):
verbosity = int(options.get('verbosity'))
client = Client()
outdir = STATICBLOG_COMPILE_DIRECTORY
posts = []
previews = []
if options['all']:
if verbosity > 3:
print ('Compiling all blog posts')
posts = self._get_all_posts()
elif options['post_name']:
posts = self._get_named_posts(options['post_name'])
else:
if verbosity > 3:
print ('Compiling new blog posts')
posts = self._get_all_posts(new = True)
if verbosity > 3:
print ('%d posts found' % len(posts))
print ('----------------------------')
for post in posts:
if verbosity > 3:
print ("Compiling " + post['md_name'] + " to " + post['html_name'])
path = reverse('blog_static.views.archive') + post['path']
# path = '/preview/' + post['path']
resp = client.get(path)
if os.path.exists(outdir + post['path']) == False:
try:
with open(outdir + post['path'], 'r') as f:
pass
except IOError as e:
os.mkdir(outdir + post['path'])
with open(outdir + post['html_name'], 'wb') as f:
f.write(resp.content)
if len(posts) > 0 and verbosity > 3:
print ('----------------------------')
if verbosity > 3:
print ('Updating listings...')
print ('----------------------------')
path = reverse('blog_static.views.archive')
resp = client.get(path)
with open(STATICBLOG_COMPILE_DIRECTORY + 'index.html', 'wb') as f:
f.write(resp.content)
if verbosity > 3:
print ('Done')
def _get_all_posts(self, new = False):
posts = []
for item in os.listdir(STATICBLOG_POST_DIRECTORY):
post = self._create_post(item, new)
if post:
posts.append(post)
return posts
def _get_named_posts(self, post):
post_list = post.split(',')
posts = []
for item in post_list:
try:
with open(STATICBLOG_POST_DIRECTORY + item, 'r') as f:
post = self._create_post(item)
if post:
posts.append(post)
except IOError as e:
print ('\033[01;31m' + str(e) + '\033[0m', file= sys.stderr)
return posts
def _create_post(self, item, new = False):
outdir = STATICBLOG_POST_DIRECTORY
compiled_post = {
'md_name' : item,
'html_name' : '',
'path' : '',
'html' : '',
}
if item.endswith('.md'):
compiled_post['path'] = item.replace('.md', '')
compiled_post['html_name'] = compiled_post['path'] + '/index.html'
if new:
try:
with open(STATICBLOG_COMPILE_DIRECTORY + compiled_post['html_name'], 'r') as f:
return False
except IOError as e:
return compiled_post
else:
return compiled_post
If you look in the code, the methods _get_named_posts(), _create_post(), handle() each have a open() function embedded. Where these open() functions are (ex: open(outdir + post['path'], 'r' or open(STATICBLOG_POST_DIRECTORY + item, 'r')) is where the problem lies, as pointed out by PyCharm.
In my view.py file, I got have this...
# Django imports
from django.template import RequestContext
from django.shortcuts import render_to_response, render
from django.core.files.storage import get_storage_class
from django.core.files.base import ContentFile
from django.views.decorators.csrf import csrf_exempt
from django.db import models
# Create a 'shortcut' function to wrap request in RequestContext()
def render_response(req, *args, **kwargs):
"""Shortcut to wrap request in RequestContext"""
kwargs['context_instance'] = RequestContext(req)
return render_to_response(*args, **kwargs)
# Standard Python lib
import os, sys, urllib, hashlib
# 3rd party apps
import markdown
from markdown.inlinepatterns import ImagePattern, IMAGE_LINK_RE
# from config folder
from P1config.settings import STATICBLOG_COMPILE_DIRECTORY, \
STATICBLOG_POST_DIRECTORY, \
STATICBLOG_STORAGE
###################################################################################
class S3ImagePattern(ImagePattern):
""" Wrapper class to handle image matches in markdown document """
def handleMatch(self, match):
node = ImagePattern.handleMatch(self, match)
# check 'src' to ensure it is local
src = node.attrib.get('src')
storage_class = get_storage_class(STATICBLOG_STORAGE)
storage = storage_class()
# otherwise we need to do some downloading!
if 'http://' in src or 'https://' in src:
img_data = urllib.request.urlopen(src).read()
md5 = hashlib.md5()
md5.update(img_data)
name = md5.hexdigest() + '/' + os.path.basename(src)
else:
with open(STATICBLOG_POST_DIRECTORY + src) as fhandle:
img_data = fhandle.read()
name = src
print('Uploading ' + src, file=sys.stderr)
try:
storage.save(name, ContentFile(img_data))
node.attrib['src'] = storage.url(name)
print ('Uploaded ' + src + ' to ' + storage.url(name), file=sys.stderr)
except Exception as e:
print(str(e), file=sys.stderr)
print ('\033[01;31mUpload of %s failed\033[0m' % src, file=sys.stderr)
return node
def render_post(request, post_name):
""" Render a blog post based on a .post template
The used template is rendered as html in the folder defined
by STATICBLOG_COMPILE_DIRECTORY
"""
content = ""
mdown = markdown.Markdown(extensions = ['meta',])
mdown.inlinePatterns['image_link'] = S3ImagePattern(IMAGE_LINK_RE, mdown)
try:
post_file_dir = os.path.join(STATICBLOG_POST_DIRECTORY, post_name + '.md')
with open(post_file_dir, 'r') as pfDIR:
content = pfDIR.read() # opening and reading the ENTIRE '.md' document
html = mdown.convert(content) # converting file from '.md' to ".html"
except IOError as e:
print (str(e))
with open(os.path.join(STATICBLOG_POST_DIRECTORY, 'preview2.md')) as f:
content = f.read()
html = mdown.convert(content)
post = { 'content' : html, }
try:
post['date'] = mdown.Meta['date'][0]
post['title'] = mdown.Meta['title'][0]
post['author'] = mdown.Meta['author'][0]
post['summary'] = mdown.Meta['summary'][0]
post['tags'] = mdown.Meta['tags'][0]
except:
pass
meta = {}
if 'title' in post:
meta['title'] = post['title']
# Context Object containing the post, meta contexes
context = {'post' : post, 'meta' : meta}
return render_response( # but could I just use render?
request,
'post2.html',
context
)
def archive(request):
mdown = markdown.Markdown(extensions = ['meta',])
# Create an empty post list for now
posts = []
import string
# Look at every 'item' in the STATICBLOG_COMPILE_DIRECTORY
for item in os.listdir(STATICBLOG_COMPILE_DIRECTORY):
# if the 'item' in this directory ends with '.post' (like '.md' in a markdown file, or '.py' in a python file)
# More specifically, if there is a '.post' file located in this directory...
if item.endswith('.md'):
# ...continue on and...
continue
# ...attempt to use that 'item'
try:
# ...by opening it,
with open(os.path.join(STATICBLOG_POST_DIRECTORY, item + '.md')) as fhandle:
# ...reading the markdown file,
content = fhandle.read() # (opening and reading the ENTIRE '.md' document)
# ...and converting it to HTML.
mdown.convert(content) # (converting file from '.md' to ".html")
post = { 'name' : item, }
if 'title' in mdown.Meta and len(mdown.Meta['title'][0]) > 0:
# Add the markdown document's 'title' to post[]
# This stores the post's title from the "Meta" section of the '.md' document
post['title'] = mdown.Meta['title'][0]
# but if that doesnt work...
else:
# Add to the post list the item's Meta 'title', which simply takes \n
# the title of the .md document and removes the '-' from it, so that \n
# we can make it the post's title.
post['title'] = string.capwords(item.replace('-', ' '))
# ...and if there exists a 'date' in the item's meta attribute...
if 'date' in mdown.Meta:
# pass the 'date' info found in the Meta attribute to a 'date' \n
# variable created in the post list
post['date'] = mdown.Meta['date'][0]
posts.append(post)
except:
pass
from operator import itemgetter
posts = sorted(posts, key=itemgetter('date'))
posts.reverse()
return render_response( # but could I just use render?
request,
'archive.html',
{'posts' : posts}
)
#csrf_exempt
def handle_hook(request):
from django.http import HttpResponse
from django.core.management import call_command
result = call_command('update_blog', verbosity = 0)
return HttpResponse(result)
I dont know how to fix this. Can you let me know how to fix these issues? I don't know where I can put a string or buffer in my code.