Python: Downloading captions from YouTube - python

Hello I tried to download captions from a YouTube video with YouTube data API.
I customized the example code produced by YouTube.
#!/usr/bin/python
# Usage example:
# python captions.py --videoid='<video_id>' --name='<name>' --file='<file>' --language='<language>' --action='action'
import httplib2
import os
import sys
from apiclient.discovery import build_from_document
from apiclient.errors import HttpError
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
from oauth2client.tools import argparser, run_flow
# The CLIENT_SECRETS_FILE variable specifies the name of a file that contains
# the OAuth 2.0 information for this application, including its client_id and
# client_secret. You can acquire an OAuth 2.0 client ID and client secret from
# the {{ Google Cloud Console }} at
# {{ https://cloud.google.com/console }}.
# Please ensure that you have enabled the YouTube Data API for your project.
# For more information about using OAuth2 to access the YouTube Data API, see:
# https://developers.google.com/youtube/v3/guides/authentication
# For more information about the client_secrets.json file format, see:
# https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
CLIENT_SECRETS_FILE = "client_secrets.json"
# This OAuth 2.0 access scope allows for full read/write access to the
# authenticated user's account and requires requests to use an SSL connection.
YOUTUBE_READ_WRITE_SSL_SCOPE = "https://www.googleapis.com/auth/youtube.force-ssl"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
# This variable defines a message to display if the CLIENT_SECRETS_FILE is
# missing.
MISSING_CLIENT_SECRETS_MESSAGE = """
WARNING: Please configure OAuth 2.0
To make this sample run you will need to populate the client_secrets.json file
found at:
%s
with information from the APIs Console
https://console.developers.google.com
For more information about the client_secrets.json file format, please visit:
https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
""" % os.path.abspath(os.path.join(os.path.dirname(__file__),
CLIENT_SECRETS_FILE))
# Authorize the request and store authorization credentials.
def get_authenticated_service(args):
flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE, scope=YOUTUBE_READ_WRITE_SSL_SCOPE,
message=MISSING_CLIENT_SECRETS_MESSAGE)
storage = Storage("%s-oauth2.json" % sys.argv[0])
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = run_flow(flow, storage, args)
# Trusted testers can download this discovery document from the developers page
# and it should be in the same directory with the code.
with open("youtube-v3-api-captions.json", "r", encoding="UTF-8", newline="") as f:
doc = f.read()
return build_from_document(doc, http=credentials.authorize(httplib2.Http()))
# Call the API's captions.list method to list the existing caption tracks.
def list_captions(youtube, video_id):
results = youtube.captions().list(
part="snippet",
videoId=video_id
).execute()
for item in results["items"]:
id = item["id"]
name = item["snippet"]["name"]
language = item["snippet"]["language"]
print ("Caption track '%s(%s)' in '%s' language.") % (name, id, language)
return results["items"]
# Call the API's captions.insert method to upload a caption track in draft status.
def upload_caption(youtube, video_id, language, name, file):
insert_result = youtube.captions().insert(
part="snippet",
body=dict(
snippet=dict(
videoId=video_id,
language=language,
name=name,
isDraft=True
)
),
media_body=file
).execute()
id = insert_result["id"]
name = insert_result["snippet"]["name"]
language = insert_result["snippet"]["language"]
status = insert_result["snippet"]["status"]
print ("Uploaded caption track '%s(%s) in '%s' language, '%s' status.") % (name,
id, language, status)
# Call the API's captions.update method to update an existing caption track's draft status
# and publish it. If a new binary file is present, update the track with the file as well.
def update_caption(youtube, caption_id, file):
update_result = youtube.captions().update(
part="snippet",
body=dict(
id=caption_id,
snippet=dict(
isDraft=False
)
),
media_body=file
).execute()
name = update_result["snippet"]["name"]
isDraft = update_result["snippet"]["isDraft"]
print ("Updated caption track '%s' draft status to be: '%s'") % (name, isDraft)
if file:
print ("and updated the track with the new uploaded file.")
# Call the API's captions.download method to download an existing caption track.
def download_caption(youtube, caption_id, tfmt):
subtitle = youtube.captions().download(
id=caption_id,
tfmt=tfmt
).execute()
print ("First line of caption track: %s") % (subtitle)
# Call the API's captions.delete method to delete an existing caption track.
def delete_caption(youtube, caption_id):
youtube.captions().delete(
id=caption_id
).execute()
print ("caption track '%s' deleted succesfully") % (caption_id)
if __name__ == "__main__":
# The "videoid" option specifies the YouTube video ID that uniquely
# identifies the video for which the caption track will be uploaded.
argparser.add_argument("--videoid",
help="Required; ID for video for which the caption track will be uploaded.")
# The "name" option specifies the name of the caption trackto be used.
argparser.add_argument("--name", help="Caption track name", default="YouTube for Developers")
# The "file" option specifies the binary file to be uploaded as a caption track.
argparser.add_argument("--file", help="Captions track file to upload")
# The "language" option specifies the language of the caption track to be uploaded.
argparser.add_argument("--language", help="Caption track language", default="en")
# The "captionid" option specifies the ID of the caption track to be processed.
argparser.add_argument("--captionid", help="Required; ID of the caption track to be processed")
# The "action" option specifies the action to be processed.
argparser.add_argument("--action", help="Action", default="all")
args = argparser.parse_args()
if (args.action in ('upload', 'list', 'all')):
if not args.videoid:
exit("Please specify videoid using the --videoid= parameter.")
if (args.action in ('update', 'download', 'delete')):
if not args.captionid:
exit("Please specify captionid using the --captionid= parameter.")
if (args.action in ('upload', 'all')):
if not args.file:
exit("Please specify a caption track file using the --file= parameter.")
if not os.path.exists(args.file):
exit("Please specify a valid file using the --file= parameter.")
youtube = get_authenticated_service(args)
try:
if args.action == 'upload':
upload_caption(youtube, args.videoid, args.language, args.name, args.file)
elif args.action == 'list':
list_captions(youtube, args.videoid)
elif args.action == 'update':
update_caption(youtube, args.captionid, args.file);
elif args.action == 'download':
download_caption(youtube, args.captionid, 'srt')
elif args.action == 'delete':
delete_caption(youtube, args.captionid);
else:
# All the available methods are used in sequence just for the sake of an example.
upload_caption(youtube, args.videoid, args.language, args.name, args.file)
captions = list_captions(youtube, args.videoid)
if captions:
first_caption_id = captions[0]['id'];
update_caption(youtube, first_caption_id, None);
download_caption(youtube, first_caption_id, 'srt')
delete_caption(youtube, first_caption_id);
except Exception as e:
print (e)
else:
print ("Created and managed caption tracks.")
If I run the command
python captions.py --videoid='00RxteR1oGQ' --language='en' --action='download'
The result is:
HttpError 404 when requesting https://www.googleapis.com/youtube/v3/captions?part=snippet&alt=json returned "The video identified by the videoId parameter could not be found."
But the video Id I typed apparently exists.
Many thanks in advance!

#download the package by: pip install pytube
from pytube import YouTube
source = YouTube('https://www.youtube.com/watch?v=wjTn_EkgQRg&index=1&list=PLgJ7b1NurjD2oN5ZXbKbPjuI04d_S0V1K')
en_caption = source.captions.get_by_language_code('en')
en_caption_convert_to_srt =(en_caption.generate_srt_captions())
print(en_caption_convert_to_srt)
#save the caption to a file named Output.txt
text_file = open("Output.txt", "w")
text_file.write(en_caption_convert_to_srt)
text_file.close()

you have to get the pytube and import youtube package from it
pip install pytube
import and specify the link you need to extract captions from
from pytube import YouTube
link = YouTube('any video link')
you can get a list of the captions available and there code, using the following code
#looking for the available captions
av_captions = link.captions
print(av_captions)
now to extract the captions and encode them in XML format and then saving them to a flat file using the following code
# caption codes format is something like this ['en', 'ar', 'fr']
caption = source.captions.get_by_language_code('One of the available caption codes')
xml_caption = caption.xml_captions #encode in xml format
#saving the the captions to a flat file
with open("output.txt", "w", encoding="utf-8") as f:
f.write(xml_caption)

Related

Disabling Billing on Google Cloud Using Google Cloud Function (Keyerror: 'data')

I am attempting to write a Google Cloud Function to set caps to disable usage above a certain limit. I followed the instructions here: https://cloud.google.com/billing/docs/how-to/notify#cap_disable_billing_to_stop_usage.
This is what my cloud function looks like (I am just copying and pasting from the Google Cloud docs page linked above):
import base64
import json
import os
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
PROJECT_ID = os.getenv('GCP_PROJECT')
PROJECT_NAME = f'projects/{PROJECT_ID}'
def stop_billing(data, context):
pubsub_data = base64.b64decode(data['data']).decode('utf-8')
pubsub_json = json.loads(pubsub_data)
cost_amount = pubsub_json['costAmount']
budget_amount = pubsub_json['budgetAmount']
if cost_amount <= budget_amount:
print(f'No action necessary. (Current cost: {cost_amount})')
return
billing = discovery.build(
'cloudbilling',
'v1',
cache_discovery=False,
credentials=GoogleCredentials.get_application_default()
)
projects = billing.projects()
if __is_billing_enabled(PROJECT_NAME, projects):
print(__disable_billing_for_project(PROJECT_NAME, projects))
else:
print('Billing already disabled')
def __is_billing_enabled(project_name, projects):
"""
Determine whether billing is enabled for a project
#param {string} project_name Name of project to check if billing is enabled
#return {bool} Whether project has billing enabled or not
"""
res = projects.getBillingInfo(name=project_name).execute()
return res['billingEnabled']
def __disable_billing_for_project(project_name, projects):
"""
Disable billing for a project by removing its billing account
#param {string} project_name Name of project disable billing on
#return {string} Text containing response from disabling billing
"""
body = {'billingAccountName': ''} # Disable billing
res = projects.updateBillingInfo(name=project_name, body=body).execute()
print(f'Billing disabled: {json.dumps(res)}')
Also attaching screenshot of what it looks like on Google Cloud Function UI:
I'm also attaching a screenshot to show that I copied and pasted the relevant things to the requirements.txt file as well.
But when I go to test the code, it gives me an error:
Expand all | Collapse all{
insertId: "000000-69dce50a-e079-45ed-b949-a241c97fdfe4"
labels: {…}
logName: "projects/stanford-cs-231n/logs/cloudfunctions.googleapis.com%2Fcloud-functions"
receiveTimestamp: "2020-02-06T16:24:26.800908134Z"
resource: {…}
severity: "ERROR"
textPayload: "Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 383, in run_background_function
_function_handler.invoke_user_function(event_object)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 217, in invoke_user_function
return call_user_function(request_or_event)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 214, in call_user_function
event_context.Context(**request_or_event.context))
File "/user_code/main.py", line 9, in stop_billing
pubsub_data = base64.b64decode(data['data']).decode('utf-8')
KeyError: 'data'
"
timestamp: "2020-02-06T16:24:25.411Z"
trace: "projects/stanford-cs-231n/traces/8e106d5ab629141d5d91b6b68fb30c82"
}
Any idea why?
Relevant Stack Overflow Post: https://stackoverflow.com/a/58673874/3507127
There seems to be an error in the code Google provided. I got it working when I changed the stop_billing function:
def stop_billing(data, context):
if 'data' in data.keys():
pubsub_data = base64.b64decode(data['data']).decode('utf-8')
pubsub_json = json.loads(pubsub_data)
cost_amount = pubsub_json['costAmount']
budget_amount = pubsub_json['budgetAmount']
else:
cost_amount = data['costAmount']
budget_amount = data['budgetAmount']
if cost_amount <= budget_amount:
print(f'No action necessary. (Current cost: {cost_amount})')
return
if PROJECT_ID is None:
print('No project specified with environment variable')
return
billing = discovery.build('cloudbilling', 'v1', cache_discovery=False, )
projects = billing.projects()
billing_enabled = __is_billing_enabled(PROJECT_NAME, projects)
if billing_enabled:
__disable_billing_for_project(PROJECT_NAME, projects)
else:
print('Billing already disabled')
The problem is that the pub/sub message provides input as a json message with a 'data' entry that is base64 encoded. In the testing functionality you provide the json entry without a 'data' key and without encoding it. This is checked for in the function that I rewrote above.

Uploading a Video to Azure Media Services with Python SDKs

I am currently looking for a way to upload a video to Azure Media Services (AMS v3) via Python SDKs. I have followed its instruction, and am able to connect to AMS successfully.
Example
credentials = AdalAuthentication(
context.acquire_token_with_client_credentials,
RESOURCE,
CLIENT,
KEY)
client = AzureMediaServices(credentials, SUBSCRIPTION_ID) # Successful
I also successfully get all the videos' details uploaded via its portal
for data in client.assets.list(RESOUCE_GROUP_NAME, ACCOUNT_NAME).get(0):
print(f'Asset_name: {data.name}, file_name: {data.description}')
# Asset_name: 4f904060-d15c-4880-8c5a-xxxxxxxx, file_name: 夢想全紀錄.mp4
# Asset_name: 8f2e5e36-d043-4182-9634-xxxxxxxx, file_name: an552Qb_460svvp9.webm
# Asset_name: aef495c1-a3dd-49bb-8e3e-xxxxxxxx, file_name: world_war_2.webm
# Asset_name: b53d8152-6ecd-41a2-a59e-xxxxxxxx, file_name: an552Qb_460svvp9.webm - Media Encoder Standard encoded
However, when I tried to use the following method; it failed. Since I have no idea what to parse as parameters - Link to Python SDKs
create_or_update(resource_group_name, account_name, asset_name,
parameters, custom_headers=None, raw=False, **operation_config)
Therefore, I would like to ask questions as follows (everything is done via Python SDKs):
What kind of parameters does it expect?
Can a video be uploaded directly to AMS or it should be uploaded to Blob Storage first?
Should an Asset contain only one video or multiple files are fine?
The documentation for the REST version of that method is at https://learn.microsoft.com/en-us/rest/api/media/assets/createorupdate. This is effectively the same as the Python parameters.
Videos are stored in Azure Storage for Media Services. This is true for input assets, the assets that are encoded, and any streamed content. It all is in Storage but accessed by Media Services. You do need to create an asset in Media Services which creates the Storage container. Once the Storage container exists you upload via the Storage APIs to that Media Services created container.
Technically multiple files are fine, but there are a number of issues with doing that that you may not expect. I'd recommend using 1 input video = 1 Media Services asset. On the encoding output side there will be more than one file in the asset. Encoding output contains one or more videos, manifests, and metadata files.
I have found my method to work around using Python SDKs and REST; however, I am not quite sure it's proper.
Log-In to Azure Media Services and Blob Storage via Python packages
import adal
from msrestazure.azure_active_directory import AdalAuthentication
from msrestazure.azure_cloud import AZURE_PUBLIC_CLOUD
from azure.mgmt.media import AzureMediaServices
from azure.mgmt.media.models import MediaService
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
Create Assets for an original file and an encoded one by parsing these parameters. Example of the original file Asset creation.
asset_name = 'asset-myvideo'
asset_properties = {
'properties': {
'description': 'Original File Description',
'storageAccountName': "storage-account-name"
}
}
client.assets.create_or_update(RESOUCE_GROUP_NAME, ACCOUNT_NAME, asset_name, asset_properties)
Upload a video to the Blob Storage derived from the created original asset
current_container = [data.container for data in client.assets.list(RESOUCE_GROUP_NAME, ACCOUNT_NAME).get(0) if data.name == asset_name][0] # Get Blob Storage location
file_name = "myvideo.mp4"
blob_client = blob_service_client.get_blob_client(container=current_container, blob=file_name)
with open('original_video.mp4', 'rb') as data:
blob_client.upload_blob(data)
print(f'Video uploaded to {current_container}')
And after that, I do Transform, Job, and Streaming Locator to get the video Streaming Link successfully.
I was able to get this to work with the newer python SDK. The python documentation is mostly missing, so I constructed this mainly from the python SDK source code and the C# examples.
azure-storage-blob==12.3.1
azure-mgmt-media==2.1.0
azure-mgmt-resource==9.0.0
adal~=1.2.2
msrestazure~=0.6.3
0) Import a lot of stuff
from azure.mgmt.media.models import Asset, Transform, Job,
BuiltInStandardEncoderPreset, TransformOutput, \
JobInputAsset, JobOutputAsset, AssetContainerSas, AssetContainerPermission
import adal
from msrestazure.azure_active_directory import AdalAuthentication
from msrestazure.azure_cloud import AZURE_PUBLIC_CLOUD
from azure.mgmt.media import AzureMediaServices
from azure.storage.blob import BlobServiceClient, ContainerClient
import datetime as dt
import time
LOGIN_ENDPOINT = AZURE_PUBLIC_CLOUD.endpoints.active_directory
RESOURCE = AZURE_PUBLIC_CLOUD.endpoints.active_directory_resource_id
# AzureSettings is a custom NamedTuple
1) Log in to AMS:
def get_ams_client(settings: AzureSettings) -> AzureMediaServices:
context = adal.AuthenticationContext(LOGIN_ENDPOINT + '/' +
settings.AZURE_MEDIA_TENANT_ID)
credentials = AdalAuthentication(
context.acquire_token_with_client_credentials,
RESOURCE,
settings.AZURE_MEDIA_CLIENT_ID,
settings.AZURE_MEDIA_SECRET
)
return AzureMediaServices(credentials, settings.AZURE_SUBSCRIPTION_ID)
2) Create an input and output asset
input_asset = create_or_update_asset(
input_asset_name, "My Input Asset", client, azure_settings)
input_asset = create_or_update_asset(
output_asset_name, "My Output Asset", client, azure_settings)
3) Get the Container Name. (most documentation refers to BlockBlobService, which is seems to have been removed from the SDK)
def get_container_name(client: AzureMediaServices, asset_name: str, settings: AzureSettings):
expiry_time = dt.datetime.now(dt.timezone.utc) + dt.timedelta(hours=4)
container_list: AssetContainerSas = client.assets.list_container_sas(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
asset_name=asset_name,
permissions = AssetContainerPermission.read_write,
expiry_time=expiry_time
)
sas_uri: str = container_list.asset_container_sas_urls[0]
container_client: ContainerClient = ContainerClient.from_container_url(sas_uri)
return container_client.container_name
4) Upload a file the the input asset container:
def upload_file_to_asset_container(
container: str, local_file, uploaded_file_name, settings: AzureSettings):
blob_service_client = BlobServiceClient.from_connection_string(settings.AZURE_MEDIA_STORAGE_CONNECTION_STRING))
blob_client = blob_service_client.get_blob_client(container=container, blob=uploaded_file_name)
with open(local_file, 'rb') as data:
blob_client.upload_blob(data)
5) Create a transform (in my case, using the adaptive streaming preset):
def get_or_create_transform(
client: AzureMediaServices,
transform_name: str,
settings: AzureSettings):
transform_output = TransformOutput(preset=BuiltInStandardEncoderPreset(preset_name="AdaptiveStreaming"))
transform: Transform = client.transforms.create_or_update(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
transform_name=transform_name,
outputs=[transform_output]
)
return transform
5) Submit the Job
def submit_job(
client: AzureMediaServices,
settings: AzureSettings,
input_asset: Asset,
output_asset: Asset,
transform_name: str,
correlation_data: dict) -> Job:
job_input = JobInputAsset(asset_name=input_asset.name)
job_outputs = [JobOutputAsset(asset_name=output_asset.name)]
job: Job = client.jobs.create(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
job_name=f"test_job_{UNIQUENESS}",
transform_name=transform_name,
parameters=Job(input=job_input,
outputs=job_outputs,
correlation_data=correlation_data)
)
return job
6) Then I get the URLs after the Event Grid has told me the job is done:
# side-effect warning: this starts the streaming endpoint $$$
def get_urls(client: AzureMediaServices, output_asset_name: str
locator_name: str):
try:
locator: StreamingLocator = client.streaming_locators.create(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_locator_name=locator_name,
parameters=StreamingLocator(
asset_name=output_asset_name,
streaming_policy_name="Predefined_ClearStreamingOnly"
)
)
except Exception as ex:
print("ignoring existing")
streaming_endpoint: StreamingEndpoint = client.streaming_endpoints.get(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_endpoint_name="default")
if streaming_endpoint:
if streaming_endpoint.resource_state != "Running":
client.streaming_endpoints.start(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_endpoint_name="default"
)
paths = client.streaming_locators.list_paths(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_locator_name=locator_name
)
return [f"https://{streaming_endpoint.host_name}{path.paths[0]}" for path in paths.streaming_paths]

Add a Loop to an existing YouTube API Python script

As confirmed with YouTube Support, unfortunately the way the YouTube API is set up, you can’t pull both Country and Device Type as dimension at the same time, and the only work-around is to pull a Device Type report, and add 1 Country at a time in the filter.
Therefore, you need to fire an API call for each country, which can be accomplished with a proper loop command, iterating the API over every possible country.
I managed to script the code for the API itself, but I need help with the loop over all possible countries (whether its through an API call getting the full country list or simply by referencing a csv file with the country list).
FYI, it is not possible to use device type as filter for a country report.
Can anyone please give me a hand in doing that? Below please find my Python code.
https://1drv.ms/u/s!AlgTM2giFod43mzV1dQARcvsB81o
Was able to answer my own question, this is the revised code in case anyone is interested:
https://1drv.ms/u/s!AlgTM2giFod43m3plTrfSRHOfaCz
#!/usr/bin/python
from datetime import datetime, timedelta
import httplib2
import os
import sys
import csv
from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
import argparse
from oauth2client.tools import argparser, run_flow
# The CLIENT_SECRETS_FILE variable specifies the name of a file that contains
# the OAuth 2.0 information for this application, including its client_id and
# client_secret. You can acquire an OAuth 2.0 client ID and client secret from
# the Google Developers Console at
# https://console.developers.google.com/.
# Please ensure that you have enabled the YouTube Data and YouTube Analytics
# APIs for your project.
# For more information about using OAuth2 to access the YouTube Data API, see:
# https://developers.google.com/youtube/v3/guides/authentication
# For more information about the client_secrets.json file format, see:
# https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
CLIENT_SECRETS_FILE = "client_secretXYZ"
# These OAuth 2.0 access scopes allow for read-only access to the authenticated
# user's account for both YouTube Data API resources and YouTube Analytics Data.
YOUTUBE_SCOPES = ["https://www.googleapis.com/auth/youtube.readonly",
"https://www.googleapis.com/auth/yt-analytics.readonly"]
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
YOUTUBE_ANALYTICS_API_SERVICE_NAME = "youtubeAnalytics"
YOUTUBE_ANALYTICS_API_VERSION = "v1"
# This variable defines a message to display if the CLIENT_SECRETS_FILE is
# missing.
MISSING_CLIENT_SECRETS_MESSAGE = """
WARNING: Please configure OAuth 2.0
To make this sample run you will need to populate the client_secrets.json file
found at:
%s
with information from the Developers Console
https://console.developers.google.com/
For more information about the client_secrets.json file format, please visit:
https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
""" % os.path.abspath(os.path.join(os.path.dirname(__file__),
CLIENT_SECRETS_FILE))
def get_authenticated_services(args):
flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE,
scope=" ".join(YOUTUBE_SCOPES),
message=MISSING_CLIENT_SECRETS_MESSAGE)
storage = Storage("%s-oauth2.json" % sys.argv[0])
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = run_flow(flow, storage, args)
http = credentials.authorize(httplib2.Http())
youtube_analytics = build(YOUTUBE_ANALYTICS_API_SERVICE_NAME,
YOUTUBE_ANALYTICS_API_VERSION, http=http)
return youtube_analytics
def run_analytics_report(youtube_analytics, options, count):
# Call the Analytics API to retrieve a report. For a list of available
# reports, see:
# https://developers.google.com/youtube/analytics/v1/channel_reports
analytics_query_response = youtube_analytics.reports().query(
ids="channel==%s" % options.channel_id,
metrics=options.metrics,
dimensions=options.dimensions,
filters=options.filters,
start_date=options.start_date,
end_date=options.end_date,
#max_results=options.max_results,
sort=options.sort
).execute()
print "Analytics Data for Channel %s" % options.channel_id
if count == 0:
with open('results.csv', 'w') as csv_out:
csvWriter=csv.writer(csv_out, delimiter=',', lineterminator = '\n')
headers = [ch["name"] for ch in analytics_query_response.get("columnHeaders", [])]
headers.append("country")
csvWriter.writerow(headers)
else:
with open('results.csv', 'a') as csv_out:
csvWriter=csv.writer(csv_out, delimiter=',', lineterminator = '\n')
for row in analytics_query_response.get("rows", []):
values = []
for value in row:
values.append(str(value))
values.append((options.filters[9]+""+options.filters[10]))
csvWriter.writerow(values)
print "Results exported to csv"
'''
for column_header in analytics_query_response.get("columnHeaders", []):
print "%-20s" % column_header["name"],
print
for row in analytics_query_response.get("rows", []):
for value in row:
print "%-20s" % value,
print
'''
if __name__ == "__main__":
count = 0
now = datetime.now()
one_day_ago = (now - timedelta(days=1)).strftime("%Y-%m-%d")
one_week_ago = (now - timedelta(days=7)).strftime("%Y-%m-%d")
f = open('countries.csv', 'rb')
reader = csv.reader(f)
for row in reader:
argparser = argparse.ArgumentParser()
argparser.add_argument("--channel-id", help="Channel ID",
default="UCJ5v_MCY6GNUBTO8-D3XoAg")
argparser.add_argument("--metrics", help="Report metrics",
default="views,estimatedMinutesWatched")
argparser.add_argument("--dimensions", help="Report dimensions",
default="deviceType")
argparser.add_argument("--filters", help="Report filters",
default="country==" + ''.join(row))
argparser.add_argument("--start-date", default=one_week_ago,
help="Start date, in YYYY-MM-DD format")
argparser.add_argument("--end-date", default=one_day_ago,
help="End date, in YYYY-MM-DD format")
#argparser.add_argument("--max-results", help="Max results", default=10)
argparser.add_argument("--sort", help="Sort order", default="-views")
args = argparser.parse_args()
youtube_analytics = get_authenticated_services(args)
try:
run_analytics_report(youtube_analytics, args, count)
count = count + 1
except HttpError, e:
print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)

How do I push different file types in Google Cloud Storage to the client browser?

I'm trying to create a document management system using Google Cloud Storage (GCS), Python2.7 and Ferris framework. I'm able to upload many types of files into cloud storage and I'm able to programmatically push CSV and TXT to the clients browser for download with no problem. But if the file is a Microsoft Word Document or a PDF or any other mime-type I keep getting the following error:
'ascii' codec can't decode byte 0xe2 in position X
The following example works if the user is trying to download a CSV file:
#route
def test_get_csv_file(self):
# the file in google cloud storage
thefilename = '/mydomain.appspot.com/my_csv_file.csv'
try:
with gcs.open(thefilename, "r") as the_file:
self.response.headers["Content-Disposition"] = "'attachment'; filename=my_csv_file.csv"
return the_file.read(32*1024*1024).decode("utf-8")
except gcs.NotFoundError:
return "it failed"
The following is an example of trying to push a Word doc which fails with the aforementioned error:
#route
def test_get_word_file(self):
# the file in google cloud storage
thefilename = '/mydomain.appspot.com/my_word_file.doc'
try:
with gcs.open(thefilename, "r") as the_file:
self.response.headers["Content-Disposition"] = "'attachment'; filename=my_word_file.doc"
return the_file.read(32*1024*1024).decode("utf-8")
except gcs.NotFoundError:
return "it failed"
Access to the files has to be restricted to the domain account so I can't set the default ACL of the bucket to public-read, otherwise I would just use the storage.googlapis.com/yadda/yadda URL as the serving url and be done with it.
I also tried changing the decode value to Latin-1 but that just rendered a blank file. I don't understand why this works with CSV files but not anything else.
I appreciate any assistance. Thanks
It doesnt't actually solve your problem. But an alternative approach is to use signed urls. The files would then be served directly from Cloud Storage and the generated url would be valid for a limited time.
I use the python module below. It has some utility methods and classes for url signing.
import datetime
import time
import urllib
from urlparse import urlparse
__author__ = 'fabio'
__all__ = ['sign', 'PolicyDocument', 'CloudStorageURLSigner']
from google.appengine.api import app_identity
from base64 import b64encode
import json
def sign(string_to_sign):
signing_key_name, signature = app_identity.sign_blob(string_to_sign)
return b64encode(signature)
class PolicyDocument:
"""Represents a policy.
Attributes:
content_type:
success_action_redirect:
key:
bucket:
expiration:
acl:
success_action_status:
"""
ACL = "acl"
SUCCESS_ACTION_REDIRECT = "success_action_redirect"
SUCCESS_ACTION_STATUS = "success_action_status"
KEY = "key"
BUCKET = "bucket"
CONTENT_TYPE = "content-type"
ACL_PUBLIC_READ = "public-read"
ACL_PROJECT_PRIVATE = "project-private"
def __init__(self, content_type=None, success_action_redirect=None, key=None, bucket=None, expiration=None,
success_action_status=201, acl=ACL_PROJECT_PRIVATE):
self.content_type = content_type
self.success_action_redirect = success_action_redirect
self.key = key
self.bucket = bucket
self.expiration = expiration
self.acl = acl
self.success_action_status = success_action_status
def as_dict(self):
conditions = [{self.ACL: self.acl},
{self.BUCKET: self.bucket},
{self.KEY: self.key},
{self.CONTENT_TYPE: self.content_type},
["starts-with", "$content-type", 'image/'],
]
# TODO investigate why its not working
if self.success_action_redirect:
conditions.append({self.SUCCESS_ACTION_REDIRECT: self.success_action_redirect})
else:
conditions.append({self.SUCCESS_ACTION_STATUS: str(self.success_action_status)})
return dict(expiration=self.expiration, conditions=conditions)
def as_json_b64encode(self):
return b64encode(self.as_json())
def as_json(self):
return json.dumps(self.as_dict())
class CloudStorageURLSigner(object):
"""Contains methods for generating signed URLs for Google Cloud Storage."""
DEFAULT_GCS_API_ENDPOINT = 'https://storage.googleapis.com'
def __init__(self, gcs_api_endpoint=None, expiration=None):
"""Creates a CloudStorageURLSigner that can be used to access signed URLs.
Args:
gcs_api_endpoint: Base URL for GCS API. Default is 'https://storage.googleapis.com'
expiration: An instance of datetime.datetime containing the time when the
signed URL should expire.
"""
self.gcs_api_endpoint = gcs_api_endpoint or self.DEFAULT_GCS_API_ENDPOINT
self.expiration = expiration or (datetime.datetime.now() +
datetime.timedelta(days=1))
self.expiration = int(time.mktime(self.expiration.timetuple()))
self.client_id_email = app_identity.get_service_account_name()
def __make_signature_string(self, verb, path, content_md5, content_type):
"""Creates the signature string for signing according to GCS docs."""
signature_string = ('{verb}\n'
'{content_md5}\n'
'{content_type}\n'
'{expiration}\n'
'{resource}')
return signature_string.format(verb=verb,
content_md5=content_md5,
content_type=content_type,
expiration=self.expiration,
resource=path)
def signed_url(self, verb, path, content_type='', content_md5=''):
"""Forms and returns the full signed URL to access GCS."""
base_url = '%s%s' % (self.gcs_api_endpoint, path)
signature_string = self.__make_signature_string(verb, path, content_md5,
content_type)
signature = urllib.quote_plus(sign(signature_string))
return "{}?GoogleAccessId={}&Expires={}&Signature={}".format(base_url, self.client_id_email,
str(self.expiration), signature)
def signed_download_url(self, url):
if self.is_stored_on_google_cloud_storage(url):
parsed_url = urlparse(url)
return self.signed_url('GET', parsed_url.path)
return url
#staticmethod
def is_stored_on_google_cloud_storage(url):
return "storage.googleapis.com" in url

GraphAPIError: (#324) Requires upload file

I'm trying to upload a picture with the python sdk:
Code:
graph = facebook.GraphAPI(self.current_user.access_token)
graph.put_object("me", "photos", name = "test", message = raw_picture_data)
But I get the error "GraphAPIError: (#324) Requires upload file". I don't think its a permissions issue as I've requested perms="user_photos,friends_photos,publish_stream". Does anyone know what this error means and how to resolve it?
I used this library to encode the image: http://atlee.ca/software/poster/
Add this to facebook.py:
from poster.encode import *
from poster.streaminghttp import register_openers
def put_photo(self, source, album_id=None, message=""):
object_id = album_id or "me"
register_openers()
content_type,body = multipart_encode( [ ('message',message),('access_token',self.access_token),('source',source) ] )
req = urllib2.Request("https://graph.facebook.com/%s/photos" % object_id, content_type,body )
try:
data = urllib2.urlopen(req).read()
except urllib2.HTTPError as e:
data = e.read()
try:
response = _parse_json(data)
if response.get("error"):
raise GraphAPIError(response["error"].get("code", 1),response["error"]["message"])
except ValueError:
response = data
return response
Call the function with the photo as a file like object:
graph = facebook.GraphAPI(access_token)
photo = open("myphoto.bmp","rb")
graph.put_photo(photo,"me","This is my brilliant photo")
The put_photo method has been submitted by someone (I forget who) as proposed a function to add to the API but it didn't work for me till I used poster to encode the image.
Hope this helps.
Just battled with a similar error a bit. I did not use the SDK but just a POST to the graphapi. For me, this error happened when i didnt supply a filename to the file upload field in the "form" sent to facebook.
This is my code (poster - http://pypi.python.org/pypi/poster/0.8.1)
from poster.encode import multipart_encode, MultipartParam
url = 'https://graph.facebook.com/me/photos?access_token=%s'%model.facebook_token
file_param = MultipartParam(name = 'source',
filename = 'photo.jpg', #this is crucial!!!
fileobj = blob_reader) #the blob reader is the fileobject for the file (with read() method)
message_param = MultipartParam(name = 'message',
value = 'test message')
datagen, headers = multipart_encode([file_param,
message_param])
from google.appengine.api import urlfetch
result = urlfetch.fetch(url,
payload = ''.join(datagen),
headers = headers,
method = 'POST')
return result.content

Categories