Google speech service - not returning transcription - python

I am using the sample code provided here and have implemented with the following:
# [START import_libraries]
import argparse
import base64
import json
import time
from oauth2client.service_account import ServiceAccountCredentials
import googleapiclient.discovery
import googleapiclient as gac
# [END import_libraries]
# [START authenticating]
# Application default credentials provided by env variable
# GOOGLE_APPLICATION_CREDENTIALS
def get_speech_service(credentials):
return googleapiclient.discovery.build('speech', 'v1beta1',credentials = credentials)
def main(speech_file):
"""Transcribe the given audio file asynchronously.
Args:
speech_file: the name of the audio file.
"""
# [START construct_request]
with open(speech_file, 'rb') as speech:
# Base64 encode the binary audio file for inclusion in the request.
speech_content = base64.b64encode(speech.read())
# print speech_content
scopes = ['https://www.googleapis.com/auth/cloud-platform']
credentials = ServiceAccountCredentials.from_json_keyfile_name(
'/Users/user/Documents/google_cloud/myjson.json', scopes)
service = get_speech_service(credentials)
service_request = service.speech().asyncrecognize(
body={
'config': {
# There are a bunch of config options you can specify. See
# https://cloud.google.com/speech/reference/rest/v1beta1/RecognitionConfig for the full list.
'encoding': 'LINEAR16', # raw 16-bit signed LE samples
'sampleRate': 16000, # 16 khz
# See http://g.co/cloud/speech/docs/languages for a list of
# supported languages.
'languageCode': 'en-US', # a BCP-47 language tag
},
'audio': {
'content': speech_content.decode('UTF-8')
}
})
# [END construct_request]
# [START send_request]
response = service_request.execute()
print(json.dumps(response))
# [END send_request]
name = response['name']
# Construct a GetOperation request.
service_request = service.operations().get(name=name)
while True:
# Give the server a few seconds to process.
print('Waiting for server processing...')
time.sleep(1)
# Get the long running operation with response.
response = service_request.execute()
if 'done' in response and response['done']:
break
# First print the raw json response
print(json.dumps(response['response'], indent=2))
# Now print the actual transcriptions
out = []
for result in response['response'].get('results', []):
print 'poo'
print('Result:')
for alternative in result['alternatives']:
print(u' Alternative: {}'.format(alternative['transcript']))
out.append(result)
return response
r = main("/Users/user/Downloads/brooklyn.flac")
Yet my print is the following:
{"name": "3202776140236290963"}
Waiting for server processing...
Waiting for server processing...
{
"#type": "type.googleapis.com/google.cloud.speech.v1beta1.AsyncRecognizeResponse"
}
And my returned object is:
{u'done': True,
u'metadata': {u'#type': u'type.googleapis.com/google.cloud.speech.v1beta1.AsyncRecognizeMetadata',
u'lastUpdateTime': u'2017-03-25T15:54:46.136925Z',
u'progressPercent': 100,
u'startTime': u'2017-03-25T15:54:44.514614Z'},
u'name': u'2024312474309214820',
u'response': {u'#type': u'type.googleapis.com/google.cloud.speech.v1beta1.AsyncRecognizeResponse'}}
On my console screen I see the requests coming through via:
Unsure why I am not getting the proper transcription back from the sample file.
Any input is appreciated!

Well your config options have the following:
'config': {
# There are a bunch of config options you can specify. See
# https://cloud.google.com/speech/reference/rest/v1beta1/RecognitionConfig for the full list.
'encoding': 'LINEAR16', # raw 16-bit signed LE samples
'sampleRate': 16000, # 16 khz
# See http://g.co/cloud/speech/docs/languages for a list of
# supported languages.
'languageCode': 'en-US', # a BCP-47 language tag
},
However, you are using a FLAC file:
r = main("/Users/user/Downloads/brooklyn.flac")
Quoting https://cloud.google.com/speech/reference/rest/v1beta1/RecognitionConfig:
LINEAR16
Uncompressed 16-bit signed little-endian samples (Linear PCM). This is the only encoding that may be used by speech.asyncrecognize.
FLAC
This is the recommended encoding for speech.syncrecognize and StreamingRecognize because it uses lossless compression; therefore recognition accuracy is not compromised by a lossy codec.
In other words you can't use FLAC with speech.asyncrecognize, you may need to transcode your sample to Linear PCM first, or use speech.syncrecognize with FLAC encoding option.

Related

RecognitionConfig error with .flac files in google transcribe

I am trying to transcribe an audio file with google cloud. Here is my code:
from google.cloud.speech_v1 import enums
from google.cloud import speech_v1p1beta1
import os
import io
def sample_long_running_recognize(local_file_path):
client = speech_v1p1beta1.SpeechClient()
# local_file_path = 'resources/commercial_mono.wav'
# If enabled, each word in the first alternative of each result will be
# tagged with a speaker tag to identify the speaker.
enable_speaker_diarization = True
# Optional. Specifies the estimated number of speakers in the conversation.
diarization_speaker_count = 2
# The language of the supplied audio
language_code = "en-US"
config = {
"enable_speaker_diarization": enable_speaker_diarization,
"diarization_speaker_count": diarization_speaker_count,
"language_code": language_code,
"encoding": enums.RecognitionConfig.AudioEncoding.FLAC
}
with io.open(local_file_path, "rb") as f:
content = f.read()
audio = {"content": content}
# audio = {"uri": storage_uri}
operation = client.long_running_recognize(config, audio)
print(u"Waiting for operation to complete...")
response = operation.result()
for result in response.results:
# First alternative has words tagged with speakers
alternative = result.alternatives[0]
print(u"Transcript: {}".format(alternative.transcript))
# Print the speaker_tag of each word
for word in alternative.words:
print(u"Word: {}".format(word.word))
print(u"Speaker tag: {}".format(word.speaker_tag))
sample_long_running_recognize('/Users/asi/Downloads/trimmed_3.flac')
I keep getting this error:
google.api_core.exceptions.InvalidArgument: 400 audio_channel_count `1` in RecognitionConfig must either be unspecified or match the value in the FLAC header `2`.
I cannot figure out what I am doing wrong. I have pretty much copy and pasted this from the google cloud speech API docs. Any advice?
This attribute (audio_channel_count) is the number of channels in the input audio data, and you only need to set this for MULTI-CHANNEL recognition. I would assume that this is your case, so as the message suggests, you need to set 'audio_channel_count' : 2 in your config to exactly match your audio file.
Please take a look on the source code for more information about the attributes for RecognitionConfig object.

Uploading a Video to Azure Media Services with Python SDKs

I am currently looking for a way to upload a video to Azure Media Services (AMS v3) via Python SDKs. I have followed its instruction, and am able to connect to AMS successfully.
Example
credentials = AdalAuthentication(
context.acquire_token_with_client_credentials,
RESOURCE,
CLIENT,
KEY)
client = AzureMediaServices(credentials, SUBSCRIPTION_ID) # Successful
I also successfully get all the videos' details uploaded via its portal
for data in client.assets.list(RESOUCE_GROUP_NAME, ACCOUNT_NAME).get(0):
print(f'Asset_name: {data.name}, file_name: {data.description}')
# Asset_name: 4f904060-d15c-4880-8c5a-xxxxxxxx, file_name: 夢想全紀錄.mp4
# Asset_name: 8f2e5e36-d043-4182-9634-xxxxxxxx, file_name: an552Qb_460svvp9.webm
# Asset_name: aef495c1-a3dd-49bb-8e3e-xxxxxxxx, file_name: world_war_2.webm
# Asset_name: b53d8152-6ecd-41a2-a59e-xxxxxxxx, file_name: an552Qb_460svvp9.webm - Media Encoder Standard encoded
However, when I tried to use the following method; it failed. Since I have no idea what to parse as parameters - Link to Python SDKs
create_or_update(resource_group_name, account_name, asset_name,
parameters, custom_headers=None, raw=False, **operation_config)
Therefore, I would like to ask questions as follows (everything is done via Python SDKs):
What kind of parameters does it expect?
Can a video be uploaded directly to AMS or it should be uploaded to Blob Storage first?
Should an Asset contain only one video or multiple files are fine?
The documentation for the REST version of that method is at https://learn.microsoft.com/en-us/rest/api/media/assets/createorupdate. This is effectively the same as the Python parameters.
Videos are stored in Azure Storage for Media Services. This is true for input assets, the assets that are encoded, and any streamed content. It all is in Storage but accessed by Media Services. You do need to create an asset in Media Services which creates the Storage container. Once the Storage container exists you upload via the Storage APIs to that Media Services created container.
Technically multiple files are fine, but there are a number of issues with doing that that you may not expect. I'd recommend using 1 input video = 1 Media Services asset. On the encoding output side there will be more than one file in the asset. Encoding output contains one or more videos, manifests, and metadata files.
I have found my method to work around using Python SDKs and REST; however, I am not quite sure it's proper.
Log-In to Azure Media Services and Blob Storage via Python packages
import adal
from msrestazure.azure_active_directory import AdalAuthentication
from msrestazure.azure_cloud import AZURE_PUBLIC_CLOUD
from azure.mgmt.media import AzureMediaServices
from azure.mgmt.media.models import MediaService
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
Create Assets for an original file and an encoded one by parsing these parameters. Example of the original file Asset creation.
asset_name = 'asset-myvideo'
asset_properties = {
'properties': {
'description': 'Original File Description',
'storageAccountName': "storage-account-name"
}
}
client.assets.create_or_update(RESOUCE_GROUP_NAME, ACCOUNT_NAME, asset_name, asset_properties)
Upload a video to the Blob Storage derived from the created original asset
current_container = [data.container for data in client.assets.list(RESOUCE_GROUP_NAME, ACCOUNT_NAME).get(0) if data.name == asset_name][0] # Get Blob Storage location
file_name = "myvideo.mp4"
blob_client = blob_service_client.get_blob_client(container=current_container, blob=file_name)
with open('original_video.mp4', 'rb') as data:
blob_client.upload_blob(data)
print(f'Video uploaded to {current_container}')
And after that, I do Transform, Job, and Streaming Locator to get the video Streaming Link successfully.
I was able to get this to work with the newer python SDK. The python documentation is mostly missing, so I constructed this mainly from the python SDK source code and the C# examples.
azure-storage-blob==12.3.1
azure-mgmt-media==2.1.0
azure-mgmt-resource==9.0.0
adal~=1.2.2
msrestazure~=0.6.3
0) Import a lot of stuff
from azure.mgmt.media.models import Asset, Transform, Job,
BuiltInStandardEncoderPreset, TransformOutput, \
JobInputAsset, JobOutputAsset, AssetContainerSas, AssetContainerPermission
import adal
from msrestazure.azure_active_directory import AdalAuthentication
from msrestazure.azure_cloud import AZURE_PUBLIC_CLOUD
from azure.mgmt.media import AzureMediaServices
from azure.storage.blob import BlobServiceClient, ContainerClient
import datetime as dt
import time
LOGIN_ENDPOINT = AZURE_PUBLIC_CLOUD.endpoints.active_directory
RESOURCE = AZURE_PUBLIC_CLOUD.endpoints.active_directory_resource_id
# AzureSettings is a custom NamedTuple
1) Log in to AMS:
def get_ams_client(settings: AzureSettings) -> AzureMediaServices:
context = adal.AuthenticationContext(LOGIN_ENDPOINT + '/' +
settings.AZURE_MEDIA_TENANT_ID)
credentials = AdalAuthentication(
context.acquire_token_with_client_credentials,
RESOURCE,
settings.AZURE_MEDIA_CLIENT_ID,
settings.AZURE_MEDIA_SECRET
)
return AzureMediaServices(credentials, settings.AZURE_SUBSCRIPTION_ID)
2) Create an input and output asset
input_asset = create_or_update_asset(
input_asset_name, "My Input Asset", client, azure_settings)
input_asset = create_or_update_asset(
output_asset_name, "My Output Asset", client, azure_settings)
3) Get the Container Name. (most documentation refers to BlockBlobService, which is seems to have been removed from the SDK)
def get_container_name(client: AzureMediaServices, asset_name: str, settings: AzureSettings):
expiry_time = dt.datetime.now(dt.timezone.utc) + dt.timedelta(hours=4)
container_list: AssetContainerSas = client.assets.list_container_sas(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
asset_name=asset_name,
permissions = AssetContainerPermission.read_write,
expiry_time=expiry_time
)
sas_uri: str = container_list.asset_container_sas_urls[0]
container_client: ContainerClient = ContainerClient.from_container_url(sas_uri)
return container_client.container_name
4) Upload a file the the input asset container:
def upload_file_to_asset_container(
container: str, local_file, uploaded_file_name, settings: AzureSettings):
blob_service_client = BlobServiceClient.from_connection_string(settings.AZURE_MEDIA_STORAGE_CONNECTION_STRING))
blob_client = blob_service_client.get_blob_client(container=container, blob=uploaded_file_name)
with open(local_file, 'rb') as data:
blob_client.upload_blob(data)
5) Create a transform (in my case, using the adaptive streaming preset):
def get_or_create_transform(
client: AzureMediaServices,
transform_name: str,
settings: AzureSettings):
transform_output = TransformOutput(preset=BuiltInStandardEncoderPreset(preset_name="AdaptiveStreaming"))
transform: Transform = client.transforms.create_or_update(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
transform_name=transform_name,
outputs=[transform_output]
)
return transform
5) Submit the Job
def submit_job(
client: AzureMediaServices,
settings: AzureSettings,
input_asset: Asset,
output_asset: Asset,
transform_name: str,
correlation_data: dict) -> Job:
job_input = JobInputAsset(asset_name=input_asset.name)
job_outputs = [JobOutputAsset(asset_name=output_asset.name)]
job: Job = client.jobs.create(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
job_name=f"test_job_{UNIQUENESS}",
transform_name=transform_name,
parameters=Job(input=job_input,
outputs=job_outputs,
correlation_data=correlation_data)
)
return job
6) Then I get the URLs after the Event Grid has told me the job is done:
# side-effect warning: this starts the streaming endpoint $$$
def get_urls(client: AzureMediaServices, output_asset_name: str
locator_name: str):
try:
locator: StreamingLocator = client.streaming_locators.create(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_locator_name=locator_name,
parameters=StreamingLocator(
asset_name=output_asset_name,
streaming_policy_name="Predefined_ClearStreamingOnly"
)
)
except Exception as ex:
print("ignoring existing")
streaming_endpoint: StreamingEndpoint = client.streaming_endpoints.get(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_endpoint_name="default")
if streaming_endpoint:
if streaming_endpoint.resource_state != "Running":
client.streaming_endpoints.start(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_endpoint_name="default"
)
paths = client.streaming_locators.list_paths(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_locator_name=locator_name
)
return [f"https://{streaming_endpoint.host_name}{path.paths[0]}" for path in paths.streaming_paths]

Python: Downloading captions from YouTube

Hello I tried to download captions from a YouTube video with YouTube data API.
I customized the example code produced by YouTube.
#!/usr/bin/python
# Usage example:
# python captions.py --videoid='<video_id>' --name='<name>' --file='<file>' --language='<language>' --action='action'
import httplib2
import os
import sys
from apiclient.discovery import build_from_document
from apiclient.errors import HttpError
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
from oauth2client.tools import argparser, run_flow
# The CLIENT_SECRETS_FILE variable specifies the name of a file that contains
# the OAuth 2.0 information for this application, including its client_id and
# client_secret. You can acquire an OAuth 2.0 client ID and client secret from
# the {{ Google Cloud Console }} at
# {{ https://cloud.google.com/console }}.
# Please ensure that you have enabled the YouTube Data API for your project.
# For more information about using OAuth2 to access the YouTube Data API, see:
# https://developers.google.com/youtube/v3/guides/authentication
# For more information about the client_secrets.json file format, see:
# https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
CLIENT_SECRETS_FILE = "client_secrets.json"
# This OAuth 2.0 access scope allows for full read/write access to the
# authenticated user's account and requires requests to use an SSL connection.
YOUTUBE_READ_WRITE_SSL_SCOPE = "https://www.googleapis.com/auth/youtube.force-ssl"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
# This variable defines a message to display if the CLIENT_SECRETS_FILE is
# missing.
MISSING_CLIENT_SECRETS_MESSAGE = """
WARNING: Please configure OAuth 2.0
To make this sample run you will need to populate the client_secrets.json file
found at:
%s
with information from the APIs Console
https://console.developers.google.com
For more information about the client_secrets.json file format, please visit:
https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
""" % os.path.abspath(os.path.join(os.path.dirname(__file__),
CLIENT_SECRETS_FILE))
# Authorize the request and store authorization credentials.
def get_authenticated_service(args):
flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE, scope=YOUTUBE_READ_WRITE_SSL_SCOPE,
message=MISSING_CLIENT_SECRETS_MESSAGE)
storage = Storage("%s-oauth2.json" % sys.argv[0])
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = run_flow(flow, storage, args)
# Trusted testers can download this discovery document from the developers page
# and it should be in the same directory with the code.
with open("youtube-v3-api-captions.json", "r", encoding="UTF-8", newline="") as f:
doc = f.read()
return build_from_document(doc, http=credentials.authorize(httplib2.Http()))
# Call the API's captions.list method to list the existing caption tracks.
def list_captions(youtube, video_id):
results = youtube.captions().list(
part="snippet",
videoId=video_id
).execute()
for item in results["items"]:
id = item["id"]
name = item["snippet"]["name"]
language = item["snippet"]["language"]
print ("Caption track '%s(%s)' in '%s' language.") % (name, id, language)
return results["items"]
# Call the API's captions.insert method to upload a caption track in draft status.
def upload_caption(youtube, video_id, language, name, file):
insert_result = youtube.captions().insert(
part="snippet",
body=dict(
snippet=dict(
videoId=video_id,
language=language,
name=name,
isDraft=True
)
),
media_body=file
).execute()
id = insert_result["id"]
name = insert_result["snippet"]["name"]
language = insert_result["snippet"]["language"]
status = insert_result["snippet"]["status"]
print ("Uploaded caption track '%s(%s) in '%s' language, '%s' status.") % (name,
id, language, status)
# Call the API's captions.update method to update an existing caption track's draft status
# and publish it. If a new binary file is present, update the track with the file as well.
def update_caption(youtube, caption_id, file):
update_result = youtube.captions().update(
part="snippet",
body=dict(
id=caption_id,
snippet=dict(
isDraft=False
)
),
media_body=file
).execute()
name = update_result["snippet"]["name"]
isDraft = update_result["snippet"]["isDraft"]
print ("Updated caption track '%s' draft status to be: '%s'") % (name, isDraft)
if file:
print ("and updated the track with the new uploaded file.")
# Call the API's captions.download method to download an existing caption track.
def download_caption(youtube, caption_id, tfmt):
subtitle = youtube.captions().download(
id=caption_id,
tfmt=tfmt
).execute()
print ("First line of caption track: %s") % (subtitle)
# Call the API's captions.delete method to delete an existing caption track.
def delete_caption(youtube, caption_id):
youtube.captions().delete(
id=caption_id
).execute()
print ("caption track '%s' deleted succesfully") % (caption_id)
if __name__ == "__main__":
# The "videoid" option specifies the YouTube video ID that uniquely
# identifies the video for which the caption track will be uploaded.
argparser.add_argument("--videoid",
help="Required; ID for video for which the caption track will be uploaded.")
# The "name" option specifies the name of the caption trackto be used.
argparser.add_argument("--name", help="Caption track name", default="YouTube for Developers")
# The "file" option specifies the binary file to be uploaded as a caption track.
argparser.add_argument("--file", help="Captions track file to upload")
# The "language" option specifies the language of the caption track to be uploaded.
argparser.add_argument("--language", help="Caption track language", default="en")
# The "captionid" option specifies the ID of the caption track to be processed.
argparser.add_argument("--captionid", help="Required; ID of the caption track to be processed")
# The "action" option specifies the action to be processed.
argparser.add_argument("--action", help="Action", default="all")
args = argparser.parse_args()
if (args.action in ('upload', 'list', 'all')):
if not args.videoid:
exit("Please specify videoid using the --videoid= parameter.")
if (args.action in ('update', 'download', 'delete')):
if not args.captionid:
exit("Please specify captionid using the --captionid= parameter.")
if (args.action in ('upload', 'all')):
if not args.file:
exit("Please specify a caption track file using the --file= parameter.")
if not os.path.exists(args.file):
exit("Please specify a valid file using the --file= parameter.")
youtube = get_authenticated_service(args)
try:
if args.action == 'upload':
upload_caption(youtube, args.videoid, args.language, args.name, args.file)
elif args.action == 'list':
list_captions(youtube, args.videoid)
elif args.action == 'update':
update_caption(youtube, args.captionid, args.file);
elif args.action == 'download':
download_caption(youtube, args.captionid, 'srt')
elif args.action == 'delete':
delete_caption(youtube, args.captionid);
else:
# All the available methods are used in sequence just for the sake of an example.
upload_caption(youtube, args.videoid, args.language, args.name, args.file)
captions = list_captions(youtube, args.videoid)
if captions:
first_caption_id = captions[0]['id'];
update_caption(youtube, first_caption_id, None);
download_caption(youtube, first_caption_id, 'srt')
delete_caption(youtube, first_caption_id);
except Exception as e:
print (e)
else:
print ("Created and managed caption tracks.")
If I run the command
python captions.py --videoid='00RxteR1oGQ' --language='en' --action='download'
The result is:
HttpError 404 when requesting https://www.googleapis.com/youtube/v3/captions?part=snippet&alt=json returned "The video identified by the videoId parameter could not be found."
But the video Id I typed apparently exists.
Many thanks in advance!
#download the package by: pip install pytube
from pytube import YouTube
source = YouTube('https://www.youtube.com/watch?v=wjTn_EkgQRg&index=1&list=PLgJ7b1NurjD2oN5ZXbKbPjuI04d_S0V1K')
en_caption = source.captions.get_by_language_code('en')
en_caption_convert_to_srt =(en_caption.generate_srt_captions())
print(en_caption_convert_to_srt)
#save the caption to a file named Output.txt
text_file = open("Output.txt", "w")
text_file.write(en_caption_convert_to_srt)
text_file.close()
you have to get the pytube and import youtube package from it
pip install pytube
import and specify the link you need to extract captions from
from pytube import YouTube
link = YouTube('any video link')
you can get a list of the captions available and there code, using the following code
#looking for the available captions
av_captions = link.captions
print(av_captions)
now to extract the captions and encode them in XML format and then saving them to a flat file using the following code
# caption codes format is something like this ['en', 'ar', 'fr']
caption = source.captions.get_by_language_code('One of the available caption codes')
xml_caption = caption.xml_captions #encode in xml format
#saving the the captions to a flat file
with open("output.txt", "w", encoding="utf-8") as f:
f.write(xml_caption)

Add a Loop to an existing YouTube API Python script

As confirmed with YouTube Support, unfortunately the way the YouTube API is set up, you can’t pull both Country and Device Type as dimension at the same time, and the only work-around is to pull a Device Type report, and add 1 Country at a time in the filter.
Therefore, you need to fire an API call for each country, which can be accomplished with a proper loop command, iterating the API over every possible country.
I managed to script the code for the API itself, but I need help with the loop over all possible countries (whether its through an API call getting the full country list or simply by referencing a csv file with the country list).
FYI, it is not possible to use device type as filter for a country report.
Can anyone please give me a hand in doing that? Below please find my Python code.
https://1drv.ms/u/s!AlgTM2giFod43mzV1dQARcvsB81o
Was able to answer my own question, this is the revised code in case anyone is interested:
https://1drv.ms/u/s!AlgTM2giFod43m3plTrfSRHOfaCz
#!/usr/bin/python
from datetime import datetime, timedelta
import httplib2
import os
import sys
import csv
from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
import argparse
from oauth2client.tools import argparser, run_flow
# The CLIENT_SECRETS_FILE variable specifies the name of a file that contains
# the OAuth 2.0 information for this application, including its client_id and
# client_secret. You can acquire an OAuth 2.0 client ID and client secret from
# the Google Developers Console at
# https://console.developers.google.com/.
# Please ensure that you have enabled the YouTube Data and YouTube Analytics
# APIs for your project.
# For more information about using OAuth2 to access the YouTube Data API, see:
# https://developers.google.com/youtube/v3/guides/authentication
# For more information about the client_secrets.json file format, see:
# https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
CLIENT_SECRETS_FILE = "client_secretXYZ"
# These OAuth 2.0 access scopes allow for read-only access to the authenticated
# user's account for both YouTube Data API resources and YouTube Analytics Data.
YOUTUBE_SCOPES = ["https://www.googleapis.com/auth/youtube.readonly",
"https://www.googleapis.com/auth/yt-analytics.readonly"]
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
YOUTUBE_ANALYTICS_API_SERVICE_NAME = "youtubeAnalytics"
YOUTUBE_ANALYTICS_API_VERSION = "v1"
# This variable defines a message to display if the CLIENT_SECRETS_FILE is
# missing.
MISSING_CLIENT_SECRETS_MESSAGE = """
WARNING: Please configure OAuth 2.0
To make this sample run you will need to populate the client_secrets.json file
found at:
%s
with information from the Developers Console
https://console.developers.google.com/
For more information about the client_secrets.json file format, please visit:
https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
""" % os.path.abspath(os.path.join(os.path.dirname(__file__),
CLIENT_SECRETS_FILE))
def get_authenticated_services(args):
flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE,
scope=" ".join(YOUTUBE_SCOPES),
message=MISSING_CLIENT_SECRETS_MESSAGE)
storage = Storage("%s-oauth2.json" % sys.argv[0])
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = run_flow(flow, storage, args)
http = credentials.authorize(httplib2.Http())
youtube_analytics = build(YOUTUBE_ANALYTICS_API_SERVICE_NAME,
YOUTUBE_ANALYTICS_API_VERSION, http=http)
return youtube_analytics
def run_analytics_report(youtube_analytics, options, count):
# Call the Analytics API to retrieve a report. For a list of available
# reports, see:
# https://developers.google.com/youtube/analytics/v1/channel_reports
analytics_query_response = youtube_analytics.reports().query(
ids="channel==%s" % options.channel_id,
metrics=options.metrics,
dimensions=options.dimensions,
filters=options.filters,
start_date=options.start_date,
end_date=options.end_date,
#max_results=options.max_results,
sort=options.sort
).execute()
print "Analytics Data for Channel %s" % options.channel_id
if count == 0:
with open('results.csv', 'w') as csv_out:
csvWriter=csv.writer(csv_out, delimiter=',', lineterminator = '\n')
headers = [ch["name"] for ch in analytics_query_response.get("columnHeaders", [])]
headers.append("country")
csvWriter.writerow(headers)
else:
with open('results.csv', 'a') as csv_out:
csvWriter=csv.writer(csv_out, delimiter=',', lineterminator = '\n')
for row in analytics_query_response.get("rows", []):
values = []
for value in row:
values.append(str(value))
values.append((options.filters[9]+""+options.filters[10]))
csvWriter.writerow(values)
print "Results exported to csv"
'''
for column_header in analytics_query_response.get("columnHeaders", []):
print "%-20s" % column_header["name"],
print
for row in analytics_query_response.get("rows", []):
for value in row:
print "%-20s" % value,
print
'''
if __name__ == "__main__":
count = 0
now = datetime.now()
one_day_ago = (now - timedelta(days=1)).strftime("%Y-%m-%d")
one_week_ago = (now - timedelta(days=7)).strftime("%Y-%m-%d")
f = open('countries.csv', 'rb')
reader = csv.reader(f)
for row in reader:
argparser = argparse.ArgumentParser()
argparser.add_argument("--channel-id", help="Channel ID",
default="UCJ5v_MCY6GNUBTO8-D3XoAg")
argparser.add_argument("--metrics", help="Report metrics",
default="views,estimatedMinutesWatched")
argparser.add_argument("--dimensions", help="Report dimensions",
default="deviceType")
argparser.add_argument("--filters", help="Report filters",
default="country==" + ''.join(row))
argparser.add_argument("--start-date", default=one_week_ago,
help="Start date, in YYYY-MM-DD format")
argparser.add_argument("--end-date", default=one_day_ago,
help="End date, in YYYY-MM-DD format")
#argparser.add_argument("--max-results", help="Max results", default=10)
argparser.add_argument("--sort", help="Sort order", default="-views")
args = argparser.parse_args()
youtube_analytics = get_authenticated_services(args)
try:
run_analytics_report(youtube_analytics, args, count)
count = count + 1
except HttpError, e:
print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)

Zabbix Discovery Rule by custom script: need any advices

I'm trying to make discovery rule to add file size monitor. But when I add my Template to Host, zabbix says me:
Value should be a JSON object
Zabbix Agent (daemon) v2.2.10 (revision 54806) (10 August 2015)
Zabbix server v2.2.9 (revision 52686) (12 March 2015)
I've written the python-script:
import os
import sys
import json
logdir = sys.argv[1]
data = []
for (logdir, _, files) in os.walk(logdir):
for f in files:
if f.endswith(".log"):
path = os.path.join(logdir, f)
data.append({'#LOGFILEPATH':path})
jsondata = json.dumps(data)
print jsondata
It works fine and gets follows:
[{"#LOGFILEPATH": "/opt/logs/projects/cms/cms.log"}, {"#LOGFILEPATH": "/opt/logs/projects/books/nginx.log"}]
I've checked it by jsonlint.com - valid JSON.
UserParameter in conf.d:
UserParameter = discovery.logfile.path, python /opt/scripts/zabbix/find.logfile.path.and.size.py /opt/logs/
There are attachments show my discovery configuration:
User zabbix has permission to directory with script and logs.
It has to make the array a value with a key of "data".
print json.dumps({"data": data})
so it produces...
{ "data": [{"#LOGFILEPATH": "/opt/logs/projects/cms/cms.log"}, {"#LOGFILEPATH": "/opt/logs/projects/books/nginx.log"}] }
And macro {#LOGFILEPATH} should be in brackets {}

Categories