Fetching Comments using Youtube data API - python

Im trying to fetch all the comments for a particular Youtube video.
I've written the following code:
#storing all the comments in a list (l)
def video_comments(url):
# empty list for storing reply
replies = []
# creating youtube resource object
youtube = build('youtube', 'v3',
developerKey=api_key)
# retrieve youtube video results
video_response=youtube.commentThreads().list(
part='snippet,replies',
videoId=url
).execute()
for item in video_response['items']:
# Extracting comments
comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
# counting number of reply of comment
replycount = item['snippet']['totalReplyCount']
# if reply is there
if replycount>0:
# iterate through all reply
for reply in item['replies']['comments']:
# Extract reply
reply = reply['snippet']['textDisplay']
# Store reply is list
replies.append(reply)
comment = remove_URL(comment)
# print comment with list of reply
l.append(comment)
for resp in replies:
resp = remove_URL(resp)
# print comment with list of replyprint(resp, replies, end = '\n\n')
l.append(comment)
# empty reply list
replies = []
video_comments(n)
However, the following code fetches only 20-25 comments even though that video has hundreds-thousands of comments.

The response has a nextPageToken attribute with its value - see the documentation, then, you have to use that token - in order to get the next results.
Try this example:
https://youtube.googleapis.com/youtube/v3/commentThreads?part=id%2Creplies%2Csnippet&maxResults=10&videoId=pf3kMUZvyE8&key=[YOUR_API_KEY]
Response: Note in the response the nextPageToken attribute.
{
"kind": "youtube#commentThreadListResponse",
"etag": "priyTHCuTXn9LlRkKazYailhGq0",
"nextPageToken": "QURTSl9pMlgzMi1IR0ZfTEtXZzNFRjQ1N3dEVmJlNXlPZ3BqUDFrMHlUejdxc3NIZFBOS013dWFRVjU5TWotWFJBaFJfUE1BSHR4aE9BQQ==",
"pageInfo": {
"totalResults": 9,
"resultsPerPage": 10
},
"items": [
{
"kind": "youtube#commentThread",
"etag": "MezAPCqHnXHD4xfxGWCKw8GwMrk",
"id": "Ugybh70lAXjKtWKnhVt4AaABAg",
"snippet": {
"videoId": "pf3kMUZvyE8",
"topLevelComment": {
"kind": "youtube#comment",
"etag": "MfJ5ylnOGfVyfNlVM7qc0mSwLJQ",
"id": "Ugybh70lAXjKtWKnhVt4AaABAg",
"snippet": {
"videoId": "pf3kMUZvyE8",
"textDisplay": "Electricity is raw energy",
"textOriginal": "Electricity is raw energy",
"authorDisplayName": "Kevinzhw Zhang wang",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AKedOLSU9_Tg183EZXdMmQbFcYKBw4WBajjPZc4gpT1W=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/channel/UCBCwvesq011-2OP1mXq6t8w",
"authorChannelId": {
"value": "UCBCwvesq011-2OP1mXq6t8w"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2021-12-24T05:59:47Z",
"updatedAt": "2021-12-24T05:59:47Z"
}
},
"canReply": true,
"totalReplyCount": 0,
"isPublic": true
}
},
{
"kind": "youtube#commentThread",
"etag": "fiwm5vdcDBQh_CtyzB05jqp3h68",
"id": "UgzoTdopkSulNGL_6tZ4AaABAg",
"snippet": {
"videoId": "pf3kMUZvyE8",
"topLevelComment": {
"kind": "youtube#comment",
"etag": "pCGjZzOYwkp7Z4bbhF_DiutwSow",
"id": "UgzoTdopkSulNGL_6tZ4AaABAg",
"snippet": {
"videoId": "pf3kMUZvyE8",
"textDisplay": "Yo no tengo autismo y si intenté eso XD",
"textOriginal": "Yo no tengo autismo y si intenté eso XD",
"authorDisplayName": "XXX DDD",
"authorProfileImageUrl": "https://yt3.ggpht.com/ytc/AKedOLTiD1hjwHmK8TWDil3XujkWfIFMvrc-_y0cTg=s48-c-k-c0x00ffffff-no-rj",
"authorChannelUrl": "http://www.youtube.com/channel/UCXarJ5GGpaBLaV1KEPimQXA",
"authorChannelId": {
"value": "UCXarJ5GGpaBLaV1KEPimQXA"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2021-12-24T00:45:31Z",
"updatedAt": "2021-12-24T00:45:31Z"
}
},
"canReply": true,
"totalReplyCount": 0,
"isPublic": true
}
},
[other comments here...]
]
}

Related

How to get the view count for a specific time frame in a YouTube video?

I'm using the YouTube v3 API to fetch the user's videos uploaded on the channel.
I need the video statistics (mainly the view count) for specific time-frames in a video. For example: from 10 second to 15 second in the video. Or maybe at the 10th second.
How can this be achieved? I have gone through the API docs but don't see any parameter or section that can provide that data.
Code snippet to fetch the data of completed livestreams / liveBroadcasts:
request = youtube.liveBroadcasts().list(
part="snippet, status",
broadcastStatus="completed",
broadcastType="all"
)
response = request.execute()
Then for each video from the list data returned, I'm running the following API to fetch the video related data:
request = youtube.videos().list(
part="snippet, contentDetails, statistics, liveStreamingDetails, status",
id=videoID
)
response = request.execute()
return response
Sample Response
{
"kind": "youtube#videoListResponse",
"etag": "JI93kvK9Gsz3h2R_OvkmgPkUrqs",
"items": [
{
"kind": "youtube#video",
"etag": "NfG5gw0fBnjykGf8db9FiaBXw3M",
"id": "FNS_JCM-H3U",
"snippet": {
"publishedAt": "2022-02-27T19:20:15Z",
"channelId": "UCVxNxW5_GhdS_o8m4PyHsmg",
"title": "Time Check Test",
"description": "",
"thumbnails": {
"default": {
"url": "https://i.ytimg.com/vi/FNS_JCM-H3U/default_live.jpg",
"width": 120,
"height": 90
},
"medium": {
"url": "https://i.ytimg.com/vi/FNS_JCM-H3U/mqdefault_live.jpg",
"width": 320,
"height": 180
},
"high": {
"url": "https://i.ytimg.com/vi/FNS_JCM-H3U/hqdefault_live.jpg",
"width": 480,
"height": 360
},
"standard": {
"url": "https://i.ytimg.com/vi/FNS_JCM-H3U/sddefault_live.jpg",
"width": 640,
"height": 480
},
"maxres": {
"url": "https://i.ytimg.com/vi/FNS_JCM-H3U/maxresdefault_live.jpg",
"width": 1280,
"height": 720
}
},
"channelTitle": "Streampala",
"categoryId": "20",
"liveBroadcastContent": "none",
"localized": {
"title": "Time Check Test",
"description": ""
},
"defaultAudioLanguage": "en-US"
},
"contentDetails": {
"duration": "PT1M20S",
"dimension": "2d",
"definition": "sd",
"caption": "false",
"licensedContent": False,
"contentRating": {},
"projection": "rectangular",
"hasCustomThumbnail": False
},
"status": {
"uploadStatus": "uploaded",
"privacyStatus": "public",
"license": "youtube",
"embeddable": False,
"publicStatsViewable": True,
"madeForKids": False,
"selfDeclaredMadeForKids": False
},
"statistics": {
"viewCount": "4",
"likeCount": "0",
"dislikeCount": "0",
"favoriteCount": "0",
"commentCount": "0"
},
"liveStreamingDetails": {
"actualStartTime": "2022-02-27T19:20:33Z",
"actualEndTime": "2022-02-27T19:21:53Z",
"scheduledStartTime": "2022-02-27T19:20:12Z"
}
}
],
"pageInfo": {
"totalResults": 1,
"resultsPerPage": 1
}
}

Response Payload incorrect using Youtube Data API

I'm trying to use Youtube's Data API to get some info on various channels, but when I run my build:
youtube = build('youtube', 'v3',developerKey=api_key)
request = youtube.channels().list(
part = "statistics",
forUsername = "tonetalks"
)
response = request.execute()
print(response)
It returns this:
{'kind': 'youtube#channelListResponse', 'etag': 'RuuXzTIr0OoDqI4S0RU6n4FqKEM', 'pageInfo': {'totalResults': 0, 'resultsPerPage': 5}}
Any thoughts how to fix this ?
The forUsername is incorrect. If you're looking for this "tonetalks" channel, you'll have to use its id UCfP8rCe_fAITriqI3UPYF0Q (from the above channel URL):
request = youtube.channels().list(
part = "statistics",
id = "UCfP8rCe_fAITriqI3UPYF0Q",
)
{
"kind": "youtube#channelListResponse",
"etag": "5gs56_i4Xd_fQ4A1OkQEnWWnX7A",
"pageInfo": {
"totalResults": 1,
"resultsPerPage": 5
},
"items": [
{
"kind": "youtube#channel",
"etag": "m7gogJwH4TshrBX4PCiuFP5MsJI",
"id": "UCfP8rCe_fAITriqI3UPYF0Q",
"statistics": {
"viewCount": "6033544",
"subscriberCount": "81200",
"hiddenSubscriberCount": false,
"videoCount": "229"
}
}
]
}

key error while parsing python dictionary

{
"kind": "youtube#commentThreadListResponse",
"etag": "5b1YCNidguUpH4QsR6mpPJrL6es",
"nextPageToken": "QURTSl9pMTQwTEZFU1VRZTB1R2toTFh5djJJSWQzM1oyOXp4Z3ppSXZSNEtNQ25RRzQyRm1xXzFwMDZvc3dqb1g5dnQyTnVUMVJld2lWVXFta2tFclh2LWk3eENwOFFxMmluTGhlY3JXOHNsSnh4ZlFyNllfdWVWMVlPdkhiWWlnVzA=",
"pageInfo": {
"totalResults": 100,
"resultsPerPage": 100
},
"items": [
{
"kind": "youtube#commentThread",
"etag": "GQifP0HFLluusa1n0pFQCxggSvI",
"id": "UgxWDLFO6d6fhe4UaJd4AaABAg",
"snippet": {
"videoId": "BEWz4SXfyCQ",
"topLevelComment": {
"kind": "youtube#comment",
"etag": "YlbdyUbeN1LqFBOqDnQnQZU2DnQ",
"id": "UgxWDLFO6d6fhe4UaJd4AaABAg",
"snippet": {
"videoId": "BEWz4SXfyCQ",
"textDisplay": "Honestly Jeremy is just an annoying piggyback rider",
"textOriginal": "Honestly Jeremy is just an annoying piggyback rider",
"authorDisplayName": "Michael Myers",
"authorProfileImageUrl": "https://yt3.ggpht.com/a/AATXAJwHIfrPXguIZR7YggVntreixLfBisGtlo5xTg=s48-c-k-c0xffffffff-no-rj-mo",
"authorChannelUrl": "http://www.youtube.com/channel/UCs4do_iNqxBcxxPmv6U1VPg",
"authorChannelId": {
"value": "UCs4do_iNqxBcxxPmv6U1VPg"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2020-07-08T20:55:48Z",
"updatedAt": "2020-07-08T20:55:48Z"
}
},
"canReply": true,
"totalReplyCount": 0,
"isPublic": true
}
},
{
"kind": "youtube#commentThread",
"etag": "wFEgumlYzFR2ZLOsHgEdQoV45SI",
"id": "UgxaQ38-nL84EgK9ABh4AaABAg",
"snippet": {
"videoId": "BEWz4SXfyCQ",
"topLevelComment": {
"kind": "youtube#comment",
"etag": "KyMK87Zq9ej2AHtl44x5-ykwnzQ",
"id": "UgxaQ38-nL84EgK9ABh4AaABAg",
"snippet": {
"videoId": "BEWz4SXfyCQ",
"textDisplay": "Bring bob back and leave captain graybeard at the damn house",
"textOriginal": "Bring bob back and leave captain graybeard at the damn house",
"authorDisplayName": "Brad Johnson",
"authorProfileImageUrl": "https://yt3.ggpht.com/a/AATXAJzzXxTu9bz5hzGL20X1w3ALIcqIWBCc4uzuQPS8=s48-c-k-c0xffffffff-no-rj-mo",
"authorChannelUrl": "http://www.youtube.com/channel/UCwTUCnELUJ3IwcBsEwqjNaQ",
"authorChannelId": {
"value": "UCwTUCnELUJ3IwcBsEwqjNaQ"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 1,
"publishedAt": "2020-07-08T18:37:35Z",
"updatedAt": "2020-07-08T18:37:35Z"
}
},
"canReply": true,
"totalReplyCount": 1,
"isPublic": true
},
"replies": {
"comments": [
{
"kind": "youtube#comment",
"etag": "eEq9MZRmGGq3sX4IpzEHk_pYvTw",
"id": "UgxaQ38-nL84EgK9ABh4AaABAg.9ArZ6N2FniS9ArdOylLUcm",
"snippet": {
"videoId": "BEWz4SXfyCQ",
"textDisplay": "No, because then there'd be no one to distract you from what a fraud Lazar is.",
"textOriginal": "No, because then there'd be no one to distract you from what a fraud Lazar is.",
"parentId": "UgxaQ38-nL84EgK9ABh4AaABAg",
"authorDisplayName": "Rombert Dillahuntsvalle",
"authorProfileImageUrl": "https://yt3.ggpht.com/a/AATXAJwALDysFZlmZoXLVeqzSZc6HcvUetsOCk6a2vTY=s48-c-k-c0xffffffff-no-rj-mo",
"authorChannelUrl": "http://www.youtube.com/channel/UCpdQrMvl72DIMs1vpsKvpgQ",
"authorChannelId": {
"value": "UCpdQrMvl72DIMs1vpsKvpgQ"
},
"canRate": true,
"viewerRating": "none",
"likeCount": 0,
"publishedAt": "2020-07-08T19:23:49Z",
"updatedAt": "2020-07-08T19:23:49Z"
}
}
]
}
},
The code is:
for i in data['items']:
print (i['replies']['comments'][0]['snippet']['textOriginal'])
My apologies for the terrible formatting, but I couldn't get all of it to fit in the code block.
I am trying to retrieve the nested "replies" then "comments". I have searched extensively through similar posts, and am still stuck.
I keep getting a key error for 'replies'.
Any help would be much appreciated, thanks.
You need either to check if the key exists or use a try/except block:
for i in dct['items']:
try:
print(i['replies']['comments'][0]['snippet']['textOriginal'])
except KeyError:
pass
This yields for your given input:
No, because then there'd be no one to distract you from what a fraud Lazar is.

Need help formatting/parsing this list

I'm not a developer so sorry if this is a dumb question or if my terminology is incorrect. I'm writing a script to make calls to our CMDB's API but i'm not sure how to handle the data that is being sent back from it. It appears to be a list type but I can't reference anything by key names. Is there a way to convert it to something that i can easily manipulate and pull data out of?
Here is my code:
import requests
import json
r=requests.post('API.URL', data={'grant_type': 'password', 'client_id':'#######', 'username': 'user', 'password': 'password'})
json_data = json.loads(r.content)
token = json_data['access_token']
data ={
"filters": [
{
"fieldId": "937905400191ae67dd03ab4b79968fcbaa264b1a75",
"operator": "eq",
"value": "hostname"
}
],
"fields":[
'9426b6ddf3cb971488517145e39efc5aa7f16fec46',
'9343f8800b3917f26533954918a6388ae8c863507f',
'9379053db492ece14816704ef5a9e3e567e217511b',
'9343f93fc4c8422bcf24e74a9a86035bb7d0248b00',
'941ba290776d6f51ce35664246927b958330a753b2'
],
"association": "Configuration Item",
"busObId": "93dada9f640056ce1dc67b4d4bb801f69104894dc8",
"includeAllFields": 'false',
"pageNumber": 0,
"pageSize": 300,
"scope": "Global",
"scopeOwner": "(None)",
"searchName": "APItest"
}
payload = json.dumps(data)
headers = {'Content-Type': 'application/json', 'Accept': 'application/json', 'Authorization':'bearer '+token}
search=requests.post('http://API.URL', headers=headers, data=payload)
search_json = json.loads(search.content)
bo = search_json['businessObjects']
print(bo)
Here's the response:
[
{
"busObRecId": "9423ad7d617390fdc956ee4302a69d0ccf1a37a4c1",
"hasError": false,
"links": [
{
"url": "http://URL",
"name": "Delete Record"
}
],
"fields": [
{
"displayName": "Business Sponsor",
"name": "Business Sponsor",
"value": "",
"html": null,
"dirty": false,
"fieldId": "9426b6ddf3cb971488517145e39efc5aa7f16fec46"
},
{
"displayName": "Owned By",
"name": "Owned By",
"value": "John Doe",
"html": null,
"dirty": false,
"fieldId": "9343f8800b3917f26533954918a6388ae8c863507f"
},
{
"displayName": "Asset Status",
"name": "Asset Status",
"value": "Active",
"html": null,
"dirty": false,
"fieldId": "9379053db492ece14816704ef5a9e3e567e217511b"
},
{
"displayName": "Description",
"name": "Description",
"value": "Automation Server",
"html": null,
"dirty": false,
"fieldId": "9343f93fc4c8422bcf24e74a9a86035bb7d0248b00"
},
{
"displayName": "Data Center Location",
"name": "Data Center Location",
"value": "",
"html": null,
"dirty": false,
"fieldId": "941ba290776d6f51ce35664246927b958330a753b2"
}
],
"errorMessage": null,
"busObPublicId": "9423ad7d617390fdc956ee4302a69d0ccf1a37a4c1",
"busObId": "93dada9f640056ce1dc67b4d4bb801f69104894dc8",
"errorCode": null
}
]
type() shows the object bo as a list and len() says it only has one element so I'm not sure how to pull data out of it without hacking away at it stripping out characters.
The reason why you cannot reference anything by key names is the fact that your output is a list. A list which is only containing one single dictionary element. If you print out
bo[0]
you get the whole data, without the [ and ] symbols. As for the dictionary now we can access different elements by keys, e.g.:
print(bo["busObId"])
will return the following value:
93dada9f640056ce1dc67b4d4bb801f69104894dc
Let's say you would want to print out the fieldId of the first element of "fields". You can do it the following way:
print(bo[0]["fields"][0]["fieldId"])
Hope this helped.

A json response into a utilized-dict

So I'm trying to parse the following response (JSON Response), here's a sample:
{
"kind": "youtube#searchListResponse",
"etag": "\"m2yskBQFythfE4irbTIeOgYYfBU/fywkWrox-IkW0v2IWY27RMiWvvA\"",
"nextPageToken": "CBQQAA",
"regionCode": "IQ",
"pageInfo": {
"totalResults": 1000000,
"resultsPerPage": 20
},
"items": [
{
"kind": "youtube#searchResult",
"etag": "\"m2yskBQFythfE4irbTIeOgYYfBU/j0uEstXCXOhrDqDegEBmEeHqsBM\"",
"id": {
"kind": "youtube#video",
"videoId": "YQHsXMglC9A"
},
"snippet": {
"publishedAt": "2015-10-23T06:54:18.000Z",
"channelId": "UComP_epzeKzvBX156r6pm1Q",
"title": "Adele - Hello",
"description": "'Hello' is taken from the new album, 25, out November 20. http://adele.com Available now from iTunes http://smarturl.it/itunes25 Available now from Amazon ...",
"thumbnails": {
"default": {
"url": "https://i.ytimg.com/vi/YQHsXMglC9A/default.jpg",
"width": 120,
"height": 90
},
"medium": {
"url": "https://i.ytimg.com/vi/YQHsXMglC9A/mqdefault.jpg",
"width": 320,
"height": 180
},
"high": {
"url": "https://i.ytimg.com/vi/YQHsXMglC9A/hqdefault.jpg",
"width": 480,
"height": 360
}
},
"channelTitle": "AdeleVEVO",
"liveBroadcastContent": "none"
}
}
And this is my parsing function:
def parse(self):
items = self['items']
i = 0
for item in items:
Data = {str(i): {
"id": item['id']['videoId'],
"title": item['snippet']['title'],
"description": item['snippet']['description'],
"thumbnail": item['snippet']['thumbnails']['medium']['url'],
"publishedAt": item['snippet']['publishedAt'],
"FullURL": "https://www.youtube.com/watch?v=" + item['id']['videoId']
}}
i = i +1
return Data
The main problem is that the dictionary is only inserting the last bit of the response, for example, I'm fetching 10 results, and it's only returning the last response. What's the problem?
Just take the definition of Data out of the for loop, initialise it as an empty dictionary and then add key/value pairs to it on each iteration. Currently, you keep redefining the entire dictionary on each loop, containing a single entry. You then end up returning the final version.
def parse(self):
items = self['items']
Data = {} # Initialise it here
for i, item in enumerate(items): # Now you don't need to increment i
# Insert your key/value pair
Data[str(i)] = {
"id": item['id']['videoId'],
"title": item['snippet']['title'],
"description": item['snippet']['description'],
"thumbnail": item['snippet']['thumbnails']['medium']['url'],
"publishedAt": item['snippet']['publishedAt'],
"FullURL": "https://www.youtube.com/watch?v=" + item['id']['videoId']
}
return Data

Categories