As confirmed with YouTube Support, unfortunately the way the YouTube API is set up, you can’t pull both Country and Device Type as dimension at the same time, and the only work-around is to pull a Device Type report, and add 1 Country at a time in the filter.
Therefore, you need to fire an API call for each country, which can be accomplished with a proper loop command, iterating the API over every possible country.
I managed to script the code for the API itself, but I need help with the loop over all possible countries (whether its through an API call getting the full country list or simply by referencing a csv file with the country list).
FYI, it is not possible to use device type as filter for a country report.
Can anyone please give me a hand in doing that? Below please find my Python code.
https://1drv.ms/u/s!AlgTM2giFod43mzV1dQARcvsB81o
Was able to answer my own question, this is the revised code in case anyone is interested:
https://1drv.ms/u/s!AlgTM2giFod43m3plTrfSRHOfaCz
#!/usr/bin/python
from datetime import datetime, timedelta
import httplib2
import os
import sys
import csv
from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
import argparse
from oauth2client.tools import argparser, run_flow
# The CLIENT_SECRETS_FILE variable specifies the name of a file that contains
# the OAuth 2.0 information for this application, including its client_id and
# client_secret. You can acquire an OAuth 2.0 client ID and client secret from
# the Google Developers Console at
# https://console.developers.google.com/.
# Please ensure that you have enabled the YouTube Data and YouTube Analytics
# APIs for your project.
# For more information about using OAuth2 to access the YouTube Data API, see:
# https://developers.google.com/youtube/v3/guides/authentication
# For more information about the client_secrets.json file format, see:
# https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
CLIENT_SECRETS_FILE = "client_secretXYZ"
# These OAuth 2.0 access scopes allow for read-only access to the authenticated
# user's account for both YouTube Data API resources and YouTube Analytics Data.
YOUTUBE_SCOPES = ["https://www.googleapis.com/auth/youtube.readonly",
"https://www.googleapis.com/auth/yt-analytics.readonly"]
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
YOUTUBE_ANALYTICS_API_SERVICE_NAME = "youtubeAnalytics"
YOUTUBE_ANALYTICS_API_VERSION = "v1"
# This variable defines a message to display if the CLIENT_SECRETS_FILE is
# missing.
MISSING_CLIENT_SECRETS_MESSAGE = """
WARNING: Please configure OAuth 2.0
To make this sample run you will need to populate the client_secrets.json file
found at:
%s
with information from the Developers Console
https://console.developers.google.com/
For more information about the client_secrets.json file format, please visit:
https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
""" % os.path.abspath(os.path.join(os.path.dirname(__file__),
CLIENT_SECRETS_FILE))
def get_authenticated_services(args):
flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE,
scope=" ".join(YOUTUBE_SCOPES),
message=MISSING_CLIENT_SECRETS_MESSAGE)
storage = Storage("%s-oauth2.json" % sys.argv[0])
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = run_flow(flow, storage, args)
http = credentials.authorize(httplib2.Http())
youtube_analytics = build(YOUTUBE_ANALYTICS_API_SERVICE_NAME,
YOUTUBE_ANALYTICS_API_VERSION, http=http)
return youtube_analytics
def run_analytics_report(youtube_analytics, options, count):
# Call the Analytics API to retrieve a report. For a list of available
# reports, see:
# https://developers.google.com/youtube/analytics/v1/channel_reports
analytics_query_response = youtube_analytics.reports().query(
ids="channel==%s" % options.channel_id,
metrics=options.metrics,
dimensions=options.dimensions,
filters=options.filters,
start_date=options.start_date,
end_date=options.end_date,
#max_results=options.max_results,
sort=options.sort
).execute()
print "Analytics Data for Channel %s" % options.channel_id
if count == 0:
with open('results.csv', 'w') as csv_out:
csvWriter=csv.writer(csv_out, delimiter=',', lineterminator = '\n')
headers = [ch["name"] for ch in analytics_query_response.get("columnHeaders", [])]
headers.append("country")
csvWriter.writerow(headers)
else:
with open('results.csv', 'a') as csv_out:
csvWriter=csv.writer(csv_out, delimiter=',', lineterminator = '\n')
for row in analytics_query_response.get("rows", []):
values = []
for value in row:
values.append(str(value))
values.append((options.filters[9]+""+options.filters[10]))
csvWriter.writerow(values)
print "Results exported to csv"
'''
for column_header in analytics_query_response.get("columnHeaders", []):
print "%-20s" % column_header["name"],
print
for row in analytics_query_response.get("rows", []):
for value in row:
print "%-20s" % value,
print
'''
if __name__ == "__main__":
count = 0
now = datetime.now()
one_day_ago = (now - timedelta(days=1)).strftime("%Y-%m-%d")
one_week_ago = (now - timedelta(days=7)).strftime("%Y-%m-%d")
f = open('countries.csv', 'rb')
reader = csv.reader(f)
for row in reader:
argparser = argparse.ArgumentParser()
argparser.add_argument("--channel-id", help="Channel ID",
default="UCJ5v_MCY6GNUBTO8-D3XoAg")
argparser.add_argument("--metrics", help="Report metrics",
default="views,estimatedMinutesWatched")
argparser.add_argument("--dimensions", help="Report dimensions",
default="deviceType")
argparser.add_argument("--filters", help="Report filters",
default="country==" + ''.join(row))
argparser.add_argument("--start-date", default=one_week_ago,
help="Start date, in YYYY-MM-DD format")
argparser.add_argument("--end-date", default=one_day_ago,
help="End date, in YYYY-MM-DD format")
#argparser.add_argument("--max-results", help="Max results", default=10)
argparser.add_argument("--sort", help="Sort order", default="-views")
args = argparser.parse_args()
youtube_analytics = get_authenticated_services(args)
try:
run_analytics_report(youtube_analytics, args, count)
count = count + 1
except HttpError, e:
print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)
Related
I have a short loop updating several gsheets, and I'd like to capture the return code from the requests to the API. I'm using the gspread and gspread_dataframe libraries. I know the function that I need to capture the response for is the set_as_dataframe() function, but I'm unable to find anything in the documentation about how to return the response code.
import os
import time
import gspread
import gspread_dataframe as gd
from oauth2client.service_account import ServiceAccountCredentials
os.chdir('C:\\mydir') # locate ourselves in the directory
# import tbls to push to gsheet
import pull_tbls
# example dataframes
tbl_1 = {'first_col': [1,2,3],
'second_col': ['apple', 'orange', 'banana']}
tbl_2 = {'first_col': [4,5,6],
'second_col': ['potato', 'carrot', 'lemon']}
tbl_1 = pull_tbls.tbl_1
tbl_2 = pull_tbls.tbl_2
# set scope
scope = ['https://www.googleapis.com/auth/drive','https://www.googleapis.com/auth/spreadsheets']
# provide credentials
credentials = ServiceAccountCredentials.from_json_keyfile_name('my_json_keyfile.json', scope)
gc = gspread.authorize(credentials)
# list of the google sheets we want to update
sheet_list = ['tbl_1', 'tbl_2']
# loop
for sheet in sheet_list:
print(sheet)
sheet = gc.open(sheet)
# sleep to avoid API limits
time.sleep(60)
wks = sheet.sheet1
# delete existing rows
wks.clear()
# when sheet matches tbl name
# replace sheet contents with tbl
sheet_name = wks.spreadsheet.title
if sheet_name == 'tbl_1':
new_data = tbl_1
elif sheet_name == 'tbl_2':
new_data = tbl_2
else:
print('sheet name not found!')
# update gsheet with new data
if new_data.empty:
print(sheet_name + ' is empty!')
continue
gd.set_with_dataframe(wks, new_data) #this is the API response I want to capture
# sleep to avoid API limits
time.sleep(100)
I'm not sure where in this process I'm able to capture the API response to updating the gsheet.
I ended up opening an issue with the maintainer. For anyone looking to see the API response for their gspread_dataframe() calls, if you add a logger and include logging.getLogger('gspread_dataframe') in your logger configuration, you should be able to see the results.
I'm using the below code to update the google sheet i have with the data from a PostgreSQL table. The table refresh frequently and i need to update the Google Sheet with the latest data of the table.
I'm new to Google API and went through goggle posts and did all he steps like sharing the google sheet with the client_email, But it is not working.
There are 3 columns as shown below,
The column header are in 3rd row and i need to update the values from 4th row onwards.
Below is the current code,
import psycopg2
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pprint
#Create scope
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
cnx_psql = psycopg2.connect(host="xxx.xxx.xxx.xx", database="postgres", user="postgres",
password="**********", port="5432")
psql_cursor = cnx_psql.cursor()
meta_query = '''select * from dl.quantity;'''
psql_cursor.execute(meta_query)
results = psql_cursor.fetchall()
cell_values = (results)
creds = ServiceAccountCredentials.from_json_keyfile_name('/Users/User_123/Documents/GS/gsheet_key.json',scope)
client = gspread.authorize(creds)
sheet = client.open('https://docs.google.com/spreadsheets/d/***************').sheet1
pp = pprint.PrettyPrinter()
result = sheet.get_all_record()
for i, val in enumerate(cell_values):
cell_list[i].value = val
sheet.update_cells(cell_list)
psql_cursor.close()
cnx_psql.close()
Getting the below error,
Traceback (most recent call last): File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/gspread/client.py", line 123, in open self.list_spreadsheet_files() File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/gspread/utils.py", line 37, in finditem return next((item for item in seq if func(item))) StopIteration
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/Users/User_123/Documents/Googlesheet_update.py", line 30, in sheet = client.open('https://docs.google.com/spreadsheets/d/********************').sheet1 File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/gspread/client.py", line 131, in open raise SpreadsheetNotFound gspread.exceptions.SpreadsheetNotFound
Your code and comments suggests that you are trying to open the spreadsheet using the full URL, but you're using the open function that only works with titles.
From the docs:
You can open a spreadsheet by its title as it appears in Google Docs:
sh = gc.open('My poor gym results')
If you want to be specific, use a key (which can be extracted from the spreadsheet’s url):
sht1 = gc.open_by_key('0BmgG6nO_6dprdS1MN3d3MkdPa142WFRrdnRRUWl1UFE')
Or, if
you feel really lazy to extract that key, paste the entire
spreadsheet’s url
sht2 = gc.open_by_url('https://docs.google.com/spreadsheet/ccc?key=0Bm...FE&hl')
In your case the last example is the way to go, so use client.open_by_url instead of client.open
This Snippet of Code will allow you to connect, from there you can look at the documentation to complete the rest of your actions!
from oauth2client.service_account import ServiceAccountCredentials
import gspread
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/spreadsheets']
#Generate a json file by using service account auth in google developer console
'''
Link: https://console.developers.google.com/
1) Enable API Access for a Project if you haven’t done it yet.
2) Go to “APIs & Services > Credentials” and choose “Create credentials > Service account key”.
3) Fill out the form
4) Click “Create” and “Done”.
5) Press “Manage service accounts” above Service Accounts.
6) Press on ⋮ near recently created service account and select “Manage keys” and then click on “ADD KEY > Create new key”.
7) Select JSON key type and press “Create”.
8) Go to the google sheet and share the sheet with the email from service accounts.
'''
creds = ServiceAccountCredentials.from_json_keyfile_name('mod.json', scope)
client = gspread.authorize(creds)
sheet = client.open_by_url("#Paste yout google sheet url here").sheet1
data = sheet.get_all_records()
sheet.update_cell(1, 1, "You made it") #Write this message in first row and first column
print(data)
I am currently looking for a way to upload a video to Azure Media Services (AMS v3) via Python SDKs. I have followed its instruction, and am able to connect to AMS successfully.
Example
credentials = AdalAuthentication(
context.acquire_token_with_client_credentials,
RESOURCE,
CLIENT,
KEY)
client = AzureMediaServices(credentials, SUBSCRIPTION_ID) # Successful
I also successfully get all the videos' details uploaded via its portal
for data in client.assets.list(RESOUCE_GROUP_NAME, ACCOUNT_NAME).get(0):
print(f'Asset_name: {data.name}, file_name: {data.description}')
# Asset_name: 4f904060-d15c-4880-8c5a-xxxxxxxx, file_name: 夢想全紀錄.mp4
# Asset_name: 8f2e5e36-d043-4182-9634-xxxxxxxx, file_name: an552Qb_460svvp9.webm
# Asset_name: aef495c1-a3dd-49bb-8e3e-xxxxxxxx, file_name: world_war_2.webm
# Asset_name: b53d8152-6ecd-41a2-a59e-xxxxxxxx, file_name: an552Qb_460svvp9.webm - Media Encoder Standard encoded
However, when I tried to use the following method; it failed. Since I have no idea what to parse as parameters - Link to Python SDKs
create_or_update(resource_group_name, account_name, asset_name,
parameters, custom_headers=None, raw=False, **operation_config)
Therefore, I would like to ask questions as follows (everything is done via Python SDKs):
What kind of parameters does it expect?
Can a video be uploaded directly to AMS or it should be uploaded to Blob Storage first?
Should an Asset contain only one video or multiple files are fine?
The documentation for the REST version of that method is at https://learn.microsoft.com/en-us/rest/api/media/assets/createorupdate. This is effectively the same as the Python parameters.
Videos are stored in Azure Storage for Media Services. This is true for input assets, the assets that are encoded, and any streamed content. It all is in Storage but accessed by Media Services. You do need to create an asset in Media Services which creates the Storage container. Once the Storage container exists you upload via the Storage APIs to that Media Services created container.
Technically multiple files are fine, but there are a number of issues with doing that that you may not expect. I'd recommend using 1 input video = 1 Media Services asset. On the encoding output side there will be more than one file in the asset. Encoding output contains one or more videos, manifests, and metadata files.
I have found my method to work around using Python SDKs and REST; however, I am not quite sure it's proper.
Log-In to Azure Media Services and Blob Storage via Python packages
import adal
from msrestazure.azure_active_directory import AdalAuthentication
from msrestazure.azure_cloud import AZURE_PUBLIC_CLOUD
from azure.mgmt.media import AzureMediaServices
from azure.mgmt.media.models import MediaService
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
Create Assets for an original file and an encoded one by parsing these parameters. Example of the original file Asset creation.
asset_name = 'asset-myvideo'
asset_properties = {
'properties': {
'description': 'Original File Description',
'storageAccountName': "storage-account-name"
}
}
client.assets.create_or_update(RESOUCE_GROUP_NAME, ACCOUNT_NAME, asset_name, asset_properties)
Upload a video to the Blob Storage derived from the created original asset
current_container = [data.container for data in client.assets.list(RESOUCE_GROUP_NAME, ACCOUNT_NAME).get(0) if data.name == asset_name][0] # Get Blob Storage location
file_name = "myvideo.mp4"
blob_client = blob_service_client.get_blob_client(container=current_container, blob=file_name)
with open('original_video.mp4', 'rb') as data:
blob_client.upload_blob(data)
print(f'Video uploaded to {current_container}')
And after that, I do Transform, Job, and Streaming Locator to get the video Streaming Link successfully.
I was able to get this to work with the newer python SDK. The python documentation is mostly missing, so I constructed this mainly from the python SDK source code and the C# examples.
azure-storage-blob==12.3.1
azure-mgmt-media==2.1.0
azure-mgmt-resource==9.0.0
adal~=1.2.2
msrestazure~=0.6.3
0) Import a lot of stuff
from azure.mgmt.media.models import Asset, Transform, Job,
BuiltInStandardEncoderPreset, TransformOutput, \
JobInputAsset, JobOutputAsset, AssetContainerSas, AssetContainerPermission
import adal
from msrestazure.azure_active_directory import AdalAuthentication
from msrestazure.azure_cloud import AZURE_PUBLIC_CLOUD
from azure.mgmt.media import AzureMediaServices
from azure.storage.blob import BlobServiceClient, ContainerClient
import datetime as dt
import time
LOGIN_ENDPOINT = AZURE_PUBLIC_CLOUD.endpoints.active_directory
RESOURCE = AZURE_PUBLIC_CLOUD.endpoints.active_directory_resource_id
# AzureSettings is a custom NamedTuple
1) Log in to AMS:
def get_ams_client(settings: AzureSettings) -> AzureMediaServices:
context = adal.AuthenticationContext(LOGIN_ENDPOINT + '/' +
settings.AZURE_MEDIA_TENANT_ID)
credentials = AdalAuthentication(
context.acquire_token_with_client_credentials,
RESOURCE,
settings.AZURE_MEDIA_CLIENT_ID,
settings.AZURE_MEDIA_SECRET
)
return AzureMediaServices(credentials, settings.AZURE_SUBSCRIPTION_ID)
2) Create an input and output asset
input_asset = create_or_update_asset(
input_asset_name, "My Input Asset", client, azure_settings)
input_asset = create_or_update_asset(
output_asset_name, "My Output Asset", client, azure_settings)
3) Get the Container Name. (most documentation refers to BlockBlobService, which is seems to have been removed from the SDK)
def get_container_name(client: AzureMediaServices, asset_name: str, settings: AzureSettings):
expiry_time = dt.datetime.now(dt.timezone.utc) + dt.timedelta(hours=4)
container_list: AssetContainerSas = client.assets.list_container_sas(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
asset_name=asset_name,
permissions = AssetContainerPermission.read_write,
expiry_time=expiry_time
)
sas_uri: str = container_list.asset_container_sas_urls[0]
container_client: ContainerClient = ContainerClient.from_container_url(sas_uri)
return container_client.container_name
4) Upload a file the the input asset container:
def upload_file_to_asset_container(
container: str, local_file, uploaded_file_name, settings: AzureSettings):
blob_service_client = BlobServiceClient.from_connection_string(settings.AZURE_MEDIA_STORAGE_CONNECTION_STRING))
blob_client = blob_service_client.get_blob_client(container=container, blob=uploaded_file_name)
with open(local_file, 'rb') as data:
blob_client.upload_blob(data)
5) Create a transform (in my case, using the adaptive streaming preset):
def get_or_create_transform(
client: AzureMediaServices,
transform_name: str,
settings: AzureSettings):
transform_output = TransformOutput(preset=BuiltInStandardEncoderPreset(preset_name="AdaptiveStreaming"))
transform: Transform = client.transforms.create_or_update(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
transform_name=transform_name,
outputs=[transform_output]
)
return transform
5) Submit the Job
def submit_job(
client: AzureMediaServices,
settings: AzureSettings,
input_asset: Asset,
output_asset: Asset,
transform_name: str,
correlation_data: dict) -> Job:
job_input = JobInputAsset(asset_name=input_asset.name)
job_outputs = [JobOutputAsset(asset_name=output_asset.name)]
job: Job = client.jobs.create(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
job_name=f"test_job_{UNIQUENESS}",
transform_name=transform_name,
parameters=Job(input=job_input,
outputs=job_outputs,
correlation_data=correlation_data)
)
return job
6) Then I get the URLs after the Event Grid has told me the job is done:
# side-effect warning: this starts the streaming endpoint $$$
def get_urls(client: AzureMediaServices, output_asset_name: str
locator_name: str):
try:
locator: StreamingLocator = client.streaming_locators.create(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_locator_name=locator_name,
parameters=StreamingLocator(
asset_name=output_asset_name,
streaming_policy_name="Predefined_ClearStreamingOnly"
)
)
except Exception as ex:
print("ignoring existing")
streaming_endpoint: StreamingEndpoint = client.streaming_endpoints.get(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_endpoint_name="default")
if streaming_endpoint:
if streaming_endpoint.resource_state != "Running":
client.streaming_endpoints.start(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_endpoint_name="default"
)
paths = client.streaming_locators.list_paths(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_locator_name=locator_name
)
return [f"https://{streaming_endpoint.host_name}{path.paths[0]}" for path in paths.streaming_paths]
Hello I tried to download captions from a YouTube video with YouTube data API.
I customized the example code produced by YouTube.
#!/usr/bin/python
# Usage example:
# python captions.py --videoid='<video_id>' --name='<name>' --file='<file>' --language='<language>' --action='action'
import httplib2
import os
import sys
from apiclient.discovery import build_from_document
from apiclient.errors import HttpError
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
from oauth2client.tools import argparser, run_flow
# The CLIENT_SECRETS_FILE variable specifies the name of a file that contains
# the OAuth 2.0 information for this application, including its client_id and
# client_secret. You can acquire an OAuth 2.0 client ID and client secret from
# the {{ Google Cloud Console }} at
# {{ https://cloud.google.com/console }}.
# Please ensure that you have enabled the YouTube Data API for your project.
# For more information about using OAuth2 to access the YouTube Data API, see:
# https://developers.google.com/youtube/v3/guides/authentication
# For more information about the client_secrets.json file format, see:
# https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
CLIENT_SECRETS_FILE = "client_secrets.json"
# This OAuth 2.0 access scope allows for full read/write access to the
# authenticated user's account and requires requests to use an SSL connection.
YOUTUBE_READ_WRITE_SSL_SCOPE = "https://www.googleapis.com/auth/youtube.force-ssl"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
# This variable defines a message to display if the CLIENT_SECRETS_FILE is
# missing.
MISSING_CLIENT_SECRETS_MESSAGE = """
WARNING: Please configure OAuth 2.0
To make this sample run you will need to populate the client_secrets.json file
found at:
%s
with information from the APIs Console
https://console.developers.google.com
For more information about the client_secrets.json file format, please visit:
https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
""" % os.path.abspath(os.path.join(os.path.dirname(__file__),
CLIENT_SECRETS_FILE))
# Authorize the request and store authorization credentials.
def get_authenticated_service(args):
flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE, scope=YOUTUBE_READ_WRITE_SSL_SCOPE,
message=MISSING_CLIENT_SECRETS_MESSAGE)
storage = Storage("%s-oauth2.json" % sys.argv[0])
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = run_flow(flow, storage, args)
# Trusted testers can download this discovery document from the developers page
# and it should be in the same directory with the code.
with open("youtube-v3-api-captions.json", "r", encoding="UTF-8", newline="") as f:
doc = f.read()
return build_from_document(doc, http=credentials.authorize(httplib2.Http()))
# Call the API's captions.list method to list the existing caption tracks.
def list_captions(youtube, video_id):
results = youtube.captions().list(
part="snippet",
videoId=video_id
).execute()
for item in results["items"]:
id = item["id"]
name = item["snippet"]["name"]
language = item["snippet"]["language"]
print ("Caption track '%s(%s)' in '%s' language.") % (name, id, language)
return results["items"]
# Call the API's captions.insert method to upload a caption track in draft status.
def upload_caption(youtube, video_id, language, name, file):
insert_result = youtube.captions().insert(
part="snippet",
body=dict(
snippet=dict(
videoId=video_id,
language=language,
name=name,
isDraft=True
)
),
media_body=file
).execute()
id = insert_result["id"]
name = insert_result["snippet"]["name"]
language = insert_result["snippet"]["language"]
status = insert_result["snippet"]["status"]
print ("Uploaded caption track '%s(%s) in '%s' language, '%s' status.") % (name,
id, language, status)
# Call the API's captions.update method to update an existing caption track's draft status
# and publish it. If a new binary file is present, update the track with the file as well.
def update_caption(youtube, caption_id, file):
update_result = youtube.captions().update(
part="snippet",
body=dict(
id=caption_id,
snippet=dict(
isDraft=False
)
),
media_body=file
).execute()
name = update_result["snippet"]["name"]
isDraft = update_result["snippet"]["isDraft"]
print ("Updated caption track '%s' draft status to be: '%s'") % (name, isDraft)
if file:
print ("and updated the track with the new uploaded file.")
# Call the API's captions.download method to download an existing caption track.
def download_caption(youtube, caption_id, tfmt):
subtitle = youtube.captions().download(
id=caption_id,
tfmt=tfmt
).execute()
print ("First line of caption track: %s") % (subtitle)
# Call the API's captions.delete method to delete an existing caption track.
def delete_caption(youtube, caption_id):
youtube.captions().delete(
id=caption_id
).execute()
print ("caption track '%s' deleted succesfully") % (caption_id)
if __name__ == "__main__":
# The "videoid" option specifies the YouTube video ID that uniquely
# identifies the video for which the caption track will be uploaded.
argparser.add_argument("--videoid",
help="Required; ID for video for which the caption track will be uploaded.")
# The "name" option specifies the name of the caption trackto be used.
argparser.add_argument("--name", help="Caption track name", default="YouTube for Developers")
# The "file" option specifies the binary file to be uploaded as a caption track.
argparser.add_argument("--file", help="Captions track file to upload")
# The "language" option specifies the language of the caption track to be uploaded.
argparser.add_argument("--language", help="Caption track language", default="en")
# The "captionid" option specifies the ID of the caption track to be processed.
argparser.add_argument("--captionid", help="Required; ID of the caption track to be processed")
# The "action" option specifies the action to be processed.
argparser.add_argument("--action", help="Action", default="all")
args = argparser.parse_args()
if (args.action in ('upload', 'list', 'all')):
if not args.videoid:
exit("Please specify videoid using the --videoid= parameter.")
if (args.action in ('update', 'download', 'delete')):
if not args.captionid:
exit("Please specify captionid using the --captionid= parameter.")
if (args.action in ('upload', 'all')):
if not args.file:
exit("Please specify a caption track file using the --file= parameter.")
if not os.path.exists(args.file):
exit("Please specify a valid file using the --file= parameter.")
youtube = get_authenticated_service(args)
try:
if args.action == 'upload':
upload_caption(youtube, args.videoid, args.language, args.name, args.file)
elif args.action == 'list':
list_captions(youtube, args.videoid)
elif args.action == 'update':
update_caption(youtube, args.captionid, args.file);
elif args.action == 'download':
download_caption(youtube, args.captionid, 'srt')
elif args.action == 'delete':
delete_caption(youtube, args.captionid);
else:
# All the available methods are used in sequence just for the sake of an example.
upload_caption(youtube, args.videoid, args.language, args.name, args.file)
captions = list_captions(youtube, args.videoid)
if captions:
first_caption_id = captions[0]['id'];
update_caption(youtube, first_caption_id, None);
download_caption(youtube, first_caption_id, 'srt')
delete_caption(youtube, first_caption_id);
except Exception as e:
print (e)
else:
print ("Created and managed caption tracks.")
If I run the command
python captions.py --videoid='00RxteR1oGQ' --language='en' --action='download'
The result is:
HttpError 404 when requesting https://www.googleapis.com/youtube/v3/captions?part=snippet&alt=json returned "The video identified by the videoId parameter could not be found."
But the video Id I typed apparently exists.
Many thanks in advance!
#download the package by: pip install pytube
from pytube import YouTube
source = YouTube('https://www.youtube.com/watch?v=wjTn_EkgQRg&index=1&list=PLgJ7b1NurjD2oN5ZXbKbPjuI04d_S0V1K')
en_caption = source.captions.get_by_language_code('en')
en_caption_convert_to_srt =(en_caption.generate_srt_captions())
print(en_caption_convert_to_srt)
#save the caption to a file named Output.txt
text_file = open("Output.txt", "w")
text_file.write(en_caption_convert_to_srt)
text_file.close()
you have to get the pytube and import youtube package from it
pip install pytube
import and specify the link you need to extract captions from
from pytube import YouTube
link = YouTube('any video link')
you can get a list of the captions available and there code, using the following code
#looking for the available captions
av_captions = link.captions
print(av_captions)
now to extract the captions and encode them in XML format and then saving them to a flat file using the following code
# caption codes format is something like this ['en', 'ar', 'fr']
caption = source.captions.get_by_language_code('One of the available caption codes')
xml_caption = caption.xml_captions #encode in xml format
#saving the the captions to a flat file
with open("output.txt", "w", encoding="utf-8") as f:
f.write(xml_caption)
I'm trying to create a document management system using Google Cloud Storage (GCS), Python2.7 and Ferris framework. I'm able to upload many types of files into cloud storage and I'm able to programmatically push CSV and TXT to the clients browser for download with no problem. But if the file is a Microsoft Word Document or a PDF or any other mime-type I keep getting the following error:
'ascii' codec can't decode byte 0xe2 in position X
The following example works if the user is trying to download a CSV file:
#route
def test_get_csv_file(self):
# the file in google cloud storage
thefilename = '/mydomain.appspot.com/my_csv_file.csv'
try:
with gcs.open(thefilename, "r") as the_file:
self.response.headers["Content-Disposition"] = "'attachment'; filename=my_csv_file.csv"
return the_file.read(32*1024*1024).decode("utf-8")
except gcs.NotFoundError:
return "it failed"
The following is an example of trying to push a Word doc which fails with the aforementioned error:
#route
def test_get_word_file(self):
# the file in google cloud storage
thefilename = '/mydomain.appspot.com/my_word_file.doc'
try:
with gcs.open(thefilename, "r") as the_file:
self.response.headers["Content-Disposition"] = "'attachment'; filename=my_word_file.doc"
return the_file.read(32*1024*1024).decode("utf-8")
except gcs.NotFoundError:
return "it failed"
Access to the files has to be restricted to the domain account so I can't set the default ACL of the bucket to public-read, otherwise I would just use the storage.googlapis.com/yadda/yadda URL as the serving url and be done with it.
I also tried changing the decode value to Latin-1 but that just rendered a blank file. I don't understand why this works with CSV files but not anything else.
I appreciate any assistance. Thanks
It doesnt't actually solve your problem. But an alternative approach is to use signed urls. The files would then be served directly from Cloud Storage and the generated url would be valid for a limited time.
I use the python module below. It has some utility methods and classes for url signing.
import datetime
import time
import urllib
from urlparse import urlparse
__author__ = 'fabio'
__all__ = ['sign', 'PolicyDocument', 'CloudStorageURLSigner']
from google.appengine.api import app_identity
from base64 import b64encode
import json
def sign(string_to_sign):
signing_key_name, signature = app_identity.sign_blob(string_to_sign)
return b64encode(signature)
class PolicyDocument:
"""Represents a policy.
Attributes:
content_type:
success_action_redirect:
key:
bucket:
expiration:
acl:
success_action_status:
"""
ACL = "acl"
SUCCESS_ACTION_REDIRECT = "success_action_redirect"
SUCCESS_ACTION_STATUS = "success_action_status"
KEY = "key"
BUCKET = "bucket"
CONTENT_TYPE = "content-type"
ACL_PUBLIC_READ = "public-read"
ACL_PROJECT_PRIVATE = "project-private"
def __init__(self, content_type=None, success_action_redirect=None, key=None, bucket=None, expiration=None,
success_action_status=201, acl=ACL_PROJECT_PRIVATE):
self.content_type = content_type
self.success_action_redirect = success_action_redirect
self.key = key
self.bucket = bucket
self.expiration = expiration
self.acl = acl
self.success_action_status = success_action_status
def as_dict(self):
conditions = [{self.ACL: self.acl},
{self.BUCKET: self.bucket},
{self.KEY: self.key},
{self.CONTENT_TYPE: self.content_type},
["starts-with", "$content-type", 'image/'],
]
# TODO investigate why its not working
if self.success_action_redirect:
conditions.append({self.SUCCESS_ACTION_REDIRECT: self.success_action_redirect})
else:
conditions.append({self.SUCCESS_ACTION_STATUS: str(self.success_action_status)})
return dict(expiration=self.expiration, conditions=conditions)
def as_json_b64encode(self):
return b64encode(self.as_json())
def as_json(self):
return json.dumps(self.as_dict())
class CloudStorageURLSigner(object):
"""Contains methods for generating signed URLs for Google Cloud Storage."""
DEFAULT_GCS_API_ENDPOINT = 'https://storage.googleapis.com'
def __init__(self, gcs_api_endpoint=None, expiration=None):
"""Creates a CloudStorageURLSigner that can be used to access signed URLs.
Args:
gcs_api_endpoint: Base URL for GCS API. Default is 'https://storage.googleapis.com'
expiration: An instance of datetime.datetime containing the time when the
signed URL should expire.
"""
self.gcs_api_endpoint = gcs_api_endpoint or self.DEFAULT_GCS_API_ENDPOINT
self.expiration = expiration or (datetime.datetime.now() +
datetime.timedelta(days=1))
self.expiration = int(time.mktime(self.expiration.timetuple()))
self.client_id_email = app_identity.get_service_account_name()
def __make_signature_string(self, verb, path, content_md5, content_type):
"""Creates the signature string for signing according to GCS docs."""
signature_string = ('{verb}\n'
'{content_md5}\n'
'{content_type}\n'
'{expiration}\n'
'{resource}')
return signature_string.format(verb=verb,
content_md5=content_md5,
content_type=content_type,
expiration=self.expiration,
resource=path)
def signed_url(self, verb, path, content_type='', content_md5=''):
"""Forms and returns the full signed URL to access GCS."""
base_url = '%s%s' % (self.gcs_api_endpoint, path)
signature_string = self.__make_signature_string(verb, path, content_md5,
content_type)
signature = urllib.quote_plus(sign(signature_string))
return "{}?GoogleAccessId={}&Expires={}&Signature={}".format(base_url, self.client_id_email,
str(self.expiration), signature)
def signed_download_url(self, url):
if self.is_stored_on_google_cloud_storage(url):
parsed_url = urlparse(url)
return self.signed_url('GET', parsed_url.path)
return url
#staticmethod
def is_stored_on_google_cloud_storage(url):
return "storage.googleapis.com" in url