I am new to categories in youtube api in Python and I'm encountering issues regarding the language of the videos after I select the region to find videos in.
The problem that I am encountering is that when I enter a region code, it comments on videos that aren't in English even though it's meant to be.
E.G: I enter the region code 'US' and the outcome is what I have attached. It comments on videos that are in a different language.
result [![enter image description here][1]][1]
I have tried to change the region_code="US" in the script but it has problems with 'US' not being defined.
Does anyone know how I can get around this problem or what I'm doing wrong? Thanks
API_KEY = "key"
# This function loads the comments in Comments.txt file
def load_comments(file):
comments = []
f = open(file, 'r', encoding='utf8')
for comment in f:
comments.append(comment)
return comments
def search_video(keyword, region_code, comments):
# Fucntion from Library
from apiclient.discovery import build
import datetime
import time
def get_category_id(youtube, cat):
req = youtube.videoCategories().list(part='snippet', regionCode=region_code)
response = req.execute()
items_list = response["items"]
for item in items_list:
video_category_id = item.get("id")
if video_category_id is not None:
return video_category_id
def search(youtube, video_category_id=None):
if video_category_id is None:
video_category_id = get_category_id(youtube, keyword)
req = youtube.search().list(videoCategoryId=video_category_id, order='date', maxResults=len(comments), # q=keyword,
publishedAfter=(datetime.datetime.utcnow() +
datetime.timedelta(minutes = -1)).isoformat('T')+'Z',
part='snippet',type='video')
return req, video_category_id
api_key = API_KEY
youtube = build('youtube', 'v3', developerKey=api_key)
req, video_category_id = search(youtube)
res = req.execute()
while len(res['items']) == 0:
time.sleep(10)
req, _ = search(youtube, video_category_id) # re-use category id if already found to prevent lag
res = req.execute()
videoid = [res['items'][i]['id']['videoId'] for i in range(len(res['items']))]
return videoid
(It asks what region to use btw)
Related
I am pulling video stats from youtube and would to automate the pull request. At the moment the code gets all the videos into a list from the API but when I run it again it loads the stats into a list from scratch. Can I have it so that it only adds the new video stats to the list. No rows should be added if no new video was uploaded.
Need to replace api key and channel id for code to work
file1
from UCODE import Channel_Stats
API_KEY = "<API>"
channel_id = ["UCLuR42wJEtpX5I-FaTasdcvViA","UCLuR42wJEtpX5IasdFaTasdcvViA"]
def get_stats(channel_id):
yt = Channel_Stats(API_KEY, channel_id)
yt.extract_all()
t = yt.save_and_return()
return t
file2
import json
import requests
from tqdm import tqdm
import pandas as pd
from youtube_transcript_api import YouTubeTranscriptApi
class Channel_Stats:
def __init__(self, api_key, channel_id):
self.api_key = api_key
self.channel_id = channel_id
self.channel_statistics = None
self.video_data = None
def extract_all(self):
self.get_channel_statistics()
self.get_channel_video_data()
def get_channel_statistics(self):
"""Extract the channel statistics"""
print('get channel statistics...')
url = f'https://www.googleapis.com/youtube/v3/channels?part=statistics&id={self.channel_id}&key={self.api_key}'
pbar = tqdm(total=1)
json_url = requests.get(url)
data = json.loads(json_url.text)
try:
data = data['items'][0]['statistics']
except KeyError:
print('Could not get channel statistics')
data = {}
self.channel_statistics = data
pbar.update()
pbar.close()
return data
def get_channel_video_data(self):
"Extract all video information of the channel"
print('get video data...')
channel_videos, channel_playlists = self._get_channel_content(limit=50)
parts = ["snippet", "statistics", "contentDetails", "topicDetails"]
for video_id in tqdm(channel_videos):
for part in parts:
data = self._get_single_video_data(video_id, part)
channel_videos[video_id].update(data)
self.video_data = channel_videos
return channel_videos
def _get_single_video_data(self, video_id, part):
"""
Extract further information for a single video
parts can be: 'snippet', 'statistics', 'contentDetails', 'topicDetails'
"""
url = f"https://www.googleapis.com/youtube/v3/videos?part={part}&id={video_id}&key={self.api_key}"
json_url = requests.get(url)
data = json.loads(json_url.text)
try:
data = data['items'][0][part]
except KeyError as e:
print(f'Error! Could not get {part} part of data: \n{data}')
data = dict()
return data
def _get_channel_content(self, limit=None, check_all_pages=True):
"""
Extract all videos and playlists, can check all available search pages
channel_videos = videoId: title, publishedAt
channel_playlists = playlistId: title, publishedAt
return channel_videos, channel_playlists
"""
url = f"https://www.googleapis.com/youtube/v3/search?key={self.api_key}&channelId={self.channel_id}&part=snippet,id&order=date"
if limit is not None and isinstance(limit, int):
url += "&maxResults=" + str(limit)
vid, pl, npt = self._get_channel_content_per_page(url)
idx = 0
while (check_all_pages and npt is not None and idx < 10):
nexturl = url + "&pageToken=" + npt
next_vid, next_pl, npt = self._get_channel_content_per_page(nexturl)
vid.update(next_vid)
pl.update(next_pl)
idx += 1
return vid, pl
def _get_channel_content_per_page(self, url):
"""
Extract all videos and playlists per page
return channel_videos, channel_playlists, nextPageToken
"""
json_url = requests.get(url)
data = json.loads(json_url.text)
channel_videos = dict()
channel_playlists = dict()
if 'items' not in data:
print('Error! Could not get correct channel data!\n', data)
return channel_videos, channel_videos, None
nextPageToken = data.get("nextPageToken", None)
item_data = data['items']
for item in item_data:
try:
kind = item['id']['kind']
published_at = item['snippet']['publishedAt']
title = item['snippet']['title']
if kind == 'youtube#video':
video_id = item['id']['videoId']
channel_videos[video_id] = {'publishedAt': published_at, 'title': title}
elif kind == 'youtube#playlist':
playlist_id = item['id']['playlistId']
channel_playlists[playlist_id] = {'publishedAt': published_at, 'title': title}
except KeyError as e:
print('Error! Could not extract data from item:\n', item)
return channel_videos, channel_playlists, nextPageToken
def save_and_return(self):
"""Dumps channel statistics and video data in a single json file"""
if self.channel_statistics is None or self.video_data is None:
print('data is missing!\nCall get_channel_statistics() and get_channel_video_data() first!')
return
fused_data = {'channel_videos_stats': self.video_data}
vidoeId = [i for i in self.video_data]
data = []
for i in self.video_data:
self.video_data[i]['videoId'] = i
data.append(self.video_data[i])
data = pd.DataFrame(data)
data['thumbnails'] = [data['thumbnails'][i]['default']['url'] for i in range(len(data))]
transcript = []
for i in range(len(data)):
try:
transcript.append(YouTubeTranscriptApi.get_transcript(data['videoId'][i]))
except:
transcript.append(None)
data['transcript'] = transcript
# data['transcript'] = [YouTubeTranscriptApi.get_transcript(data['videoId'][i]) for i in range(len(data))]
channel_title = self.video_data.popitem()[1].get('channelTitle', self.channel_id)
channel_title = channel_title.replace(" ", "_").lower()
# data.to_excel(channel_title+'.xlsx')
# print('file dumped to', filename)
return data
The code below is what I use to upload a profile picture but I get the following error:
def get_image_bytes():
file_size = os.path.getsize('./image/example.jpg')
return file_size
def get_image_raw():
with open("./image/example.jpg", "rb") as img_file:
enc_image = img_file.read()
return enc_image
def get_media_id():
file_bytes = get_image_bytes();
resp = httpx.post(f'https://upload.twitter.com/i/media/upload.json?command=INIT&total_bytes={file_bytes}&media_type=image/jpeg', headers=general_headers)
return resp.json()['media_id_string']
def append_image():
media_id = get_media_id()
resp = httpx.post(f'https://upload.twitter.com/i/media/upload.json?command=APPEND&media_id={media_id}&segment_index=0&media={get_image_raw()}', headers=webkit_headers)
return media_id
def update_profile():
media_id = append_image()
resp = httpx.post(f'https://upload.twitter.com/i/media/upload.json?command=FINALIZE&media_id={media_id}', headers=general_headers)
print(resp.json())
update_profile()
Error:
{'request': '/i/media/upload.json', 'error': 'Segments do not add up to provided total file size.'}
I don't know how twitter wants to receive the image binary.
I've heard word that they expect chunks but I've tried almost everything.
When uploading a new profile picture I do see that they crop the image maybe this can be a reason why but I am to no avail at the moment.
My image is under 5mb by the way.
So I have been using youtube api to scrape a channel. Everything was working fine until 3 days ago (03/15/2019) when the result isn't sorted anymore. It seems that no matter what I put in the order parameter, the results are all the same. Can anyone tell me why it isn't working? Here's the code snippet:
import re
import os
import json
import MySQLdb
from pytube import YouTube
import urllib
import isodate
import sys
def get_all_video_in_channel(channel_id):
api_key = '<MY KEY>'
video_url = 'https://www.googleapis.com/youtube/v3/videos?part=snippet,contentDetails&id={}&key={}'
first_url = 'https://www.googleapis.com/youtube/v3/search?key={}&channelId={}&part=snippet,id&order=date&maxResults=50'.format(api_key, channel_id) #order by date but won't work
res = []
url = first_url
while True:
inp = urllib.urlopen(url)
resp = json.load(inp)
vidIds = []
for jobject in resp['items']:
if jobject['id']['kind'] == "youtube#video":
vidIds.append(jobject['id']['videoId'])
vidreq = urllib.urlopen(video_url.format(",".join(vidIds),api_key))
vidres = json.load(vidreq)
for vidjson in vidres['items']:
res.append(vidjson)
if (len(res) >= 50):
break
try:
next_page_token = resp['nextPageToken']
url = first_url + '&pageToken={}'.format(next_page_token)
except:
break
return res
c_id = 'UCycyxZMoPwg9cuRDMyQE7PQ'
episodes = get_all_video_in_channel(c_id)
Edit: I did some more research and people say that the API indeed is not working properly due to Youtube doing something with deleting the New Zealand shooting video and it will soon be working properly again.
I recommend you to see the answer https://stackoverflow.com/a/55220182/8327971. This is a known and acknowledged issue by Google: https://issuetracker.google.com/issues/128673552.
So I'm following the tutorial of a certain reddit to twitter bot thats coded in python using PRAW and I am stuck hitting errors.
Running this code in the command console gives me the error on line 74
import praw
import json
import requests
import tweepy
import time
access_token = 'secret'
access_token_secret = ' secret'
consumer_key = 'secret'
consumer_secret = 'secret'
def strip_title(title):
if len(title) < 94:
return title
else:
return title[:93] + "..."
def tweet_creator(subreddit_info):
post_dict = {}
post_ids = []
print "[bot] Getting posts from Reddit"
for submission in subreddit_info.get_hot(limit=20):
post_dict[strip_title(submission.title)] = submission.url
post_ids.append(submission.id)
print "[bot] Generating short link using goo.gl"
mini_post_dict = {}
for post in post_dict:
post_title = post
post_link = post_dict[post]
short_link = shorten(post_link)
mini_post_dict[post_title] = short_link
return mini_post_dict, post_ids
def setup_connection_reddit(subreddit):
print "[bot] setting up connection with Reddit"
r = praw.Reddit('yasoob_python reddit twitter bot '
'monitoring %s' %(subreddit))
subreddit = r.get_subreddit(subreddit)
return subreddit
def shorten(url):
headers = {'content-type': 'application/json'}
payload = {"longUrl": url}
url = "https://www.googleapis.com/urlshortener/v1/url"
r = requests.post(url, data=json.dumps(payload), headers=headers)
link = json.loads(r.text)
return link
def duplicate_check(id):
found = 0
with open('posted_posts.txt', 'r') as file:
for line in file:
if id in line:
found = 1
return found
def add_id_to_file(id):
with open('posted_posts.txt', 'a') as file:
file.write(str(id) + "\n")
def main():
subreddit = setup_connection_reddit('showerthoughts')
post_dict, post_ids = tweet_creator(subreddit)
tweeter(post_dict, post_ids)
def tweeter(post_dict, post_ids):
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
for post, post_id in zip(post_dict, post_ids):
found = duplicate_check(post_id)
if found == 0:
print "[bot] Posting this link on twitter"
print post+" "+post_dict[post]+" #Python #reddit #bot"
api.update_status(post+" "+post_dict[post]+" #Python #reddit #bot")
add_id_to_file(post_id)
time.sleep(30)
else:
print "[bot] Already posted"
if __name__ == '__main__':
main()
Error :
print post+" "+post_dict[post]+"#python #reddit #bot"
TypeError: coercing to Unicode: need string or buffer, dict found
My understanding of the code and error is that it needs a string to be send but is somehow getting the entire key-dictionary set. I thought by sending the [post] parameter into post_dict that it will be able to get the certain post for the bot to utalize, but instead, its fetching the dictionary!
There are two lines, 74 and 75 that both call post_dict[post] and is not utalizing the dictionary's value when calling post key.
Try printing post and post_dict before you call that concatenation in the the tweeter function's For loop. That should show you what those structs look like and make the solution evident.
I am using python-instagram API and I am displaying some images with searched tag!
Rather than displaying, I want to save those images so that it can be used for further analysis.
Is it possible? I am new to python and using API's.
Here is my code snippet which does this:
#route('/tag_search')
def tag_search(session):
access_token = session.get('access_token')
content = "<h2>Tag Search</h2>"
if not access_token:
return 'Missing Access Token'
try:
api = client.InstagramAPI(access_token=access_token)
tag_search, next_tag = api.tag_search(q="catband")
tag_recent_media, next = api.tag_recent_media(tag_name=tag_search[0].name)
photos = []
for tag_media in tag_recent_media:
photos.append('<img src="%s"/>' % tag_media.get_standard_resolution_url())
content += ''.join(photos)
except Exception, e:
print e
Thanx in advance:)
After some help from comments, and other resources, I found out that since I have URL of the image, I can use it to download!
The library which is used was "urllib"
I used a counter variable to save images in the same directory where the file is and in the form of 1.jpg, 2.jpg and so on and so forth.
Here is the modified code:
#route('/tag_search')
def tag_search(session):
access_token = session.get('access_token')
content = "<h2>Tag Search</h2>"
if not access_token:
return 'Missing Access Token'
try:
api = client.InstagramAPI(access_token=access_token)
tag_search, next_tag = api.tag_search(q="selfie")
tag_recent_media, next = api.tag_recent_media(tag_name=tag_search[0].name)
photos = []
count = 0
for tag_media in tag_recent_media:
photos.append('<img src="%s"/>' % tag_media.get_standard_resolution_url())
urllib.urlretrieve(tag_media.get_standard_resolution_url(), `count`+".jpg")
count = count + 1
content += ''.join(photos)
except Exception, e:
print e
Hope this Helps:)