I am attempting to write a Google Cloud Function to set caps to disable usage above a certain limit. I followed the instructions here: https://cloud.google.com/billing/docs/how-to/notify#cap_disable_billing_to_stop_usage.
This is what my cloud function looks like (I am just copying and pasting from the Google Cloud docs page linked above):
import base64
import json
import os
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
PROJECT_ID = os.getenv('GCP_PROJECT')
PROJECT_NAME = f'projects/{PROJECT_ID}'
def stop_billing(data, context):
pubsub_data = base64.b64decode(data['data']).decode('utf-8')
pubsub_json = json.loads(pubsub_data)
cost_amount = pubsub_json['costAmount']
budget_amount = pubsub_json['budgetAmount']
if cost_amount <= budget_amount:
print(f'No action necessary. (Current cost: {cost_amount})')
return
billing = discovery.build(
'cloudbilling',
'v1',
cache_discovery=False,
credentials=GoogleCredentials.get_application_default()
)
projects = billing.projects()
if __is_billing_enabled(PROJECT_NAME, projects):
print(__disable_billing_for_project(PROJECT_NAME, projects))
else:
print('Billing already disabled')
def __is_billing_enabled(project_name, projects):
"""
Determine whether billing is enabled for a project
#param {string} project_name Name of project to check if billing is enabled
#return {bool} Whether project has billing enabled or not
"""
res = projects.getBillingInfo(name=project_name).execute()
return res['billingEnabled']
def __disable_billing_for_project(project_name, projects):
"""
Disable billing for a project by removing its billing account
#param {string} project_name Name of project disable billing on
#return {string} Text containing response from disabling billing
"""
body = {'billingAccountName': ''} # Disable billing
res = projects.updateBillingInfo(name=project_name, body=body).execute()
print(f'Billing disabled: {json.dumps(res)}')
Also attaching screenshot of what it looks like on Google Cloud Function UI:
I'm also attaching a screenshot to show that I copied and pasted the relevant things to the requirements.txt file as well.
But when I go to test the code, it gives me an error:
Expand all | Collapse all{
insertId: "000000-69dce50a-e079-45ed-b949-a241c97fdfe4"
labels: {…}
logName: "projects/stanford-cs-231n/logs/cloudfunctions.googleapis.com%2Fcloud-functions"
receiveTimestamp: "2020-02-06T16:24:26.800908134Z"
resource: {…}
severity: "ERROR"
textPayload: "Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 383, in run_background_function
_function_handler.invoke_user_function(event_object)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 217, in invoke_user_function
return call_user_function(request_or_event)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 214, in call_user_function
event_context.Context(**request_or_event.context))
File "/user_code/main.py", line 9, in stop_billing
pubsub_data = base64.b64decode(data['data']).decode('utf-8')
KeyError: 'data'
"
timestamp: "2020-02-06T16:24:25.411Z"
trace: "projects/stanford-cs-231n/traces/8e106d5ab629141d5d91b6b68fb30c82"
}
Any idea why?
Relevant Stack Overflow Post: https://stackoverflow.com/a/58673874/3507127
There seems to be an error in the code Google provided. I got it working when I changed the stop_billing function:
def stop_billing(data, context):
if 'data' in data.keys():
pubsub_data = base64.b64decode(data['data']).decode('utf-8')
pubsub_json = json.loads(pubsub_data)
cost_amount = pubsub_json['costAmount']
budget_amount = pubsub_json['budgetAmount']
else:
cost_amount = data['costAmount']
budget_amount = data['budgetAmount']
if cost_amount <= budget_amount:
print(f'No action necessary. (Current cost: {cost_amount})')
return
if PROJECT_ID is None:
print('No project specified with environment variable')
return
billing = discovery.build('cloudbilling', 'v1', cache_discovery=False, )
projects = billing.projects()
billing_enabled = __is_billing_enabled(PROJECT_NAME, projects)
if billing_enabled:
__disable_billing_for_project(PROJECT_NAME, projects)
else:
print('Billing already disabled')
The problem is that the pub/sub message provides input as a json message with a 'data' entry that is base64 encoded. In the testing functionality you provide the json entry without a 'data' key and without encoding it. This is checked for in the function that I rewrote above.
Related
I have recently upgraded my Azure Cognitive Search instance so it has semantic search.
However, when I add query_type=semantic, in the client search I get the following stacktrace...
Traceback (most recent call last):
File "call_semantic_search.py", line 34, in <module>
c, r = main(search_text='what is a ')
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "call_semantic_search.py", line 28, in main
count: float = search_results.get_count()
^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.11/site-packages/azure/search/documents/_paging.py", line 82, in get_count
return self._first_iterator_instance().get_count()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.11/site-packages/azure/search/documents/_paging.py", line 91, in wrapper
self._response = self._get_next(self.continuation_token)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.11/site-packages/azure/search/documents/_paging.py", line 115, in _get_next_cb
return self._client.documents.search_post(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.11/site-packages/azure/search/documents/_generated/operations/_documents_operations.py", line 312, in search_post
raise HttpResponseError(response=response, model=error)
azure.core.exceptions.HttpResponseError: () The request is invalid. Details: parameters : Requested value 'semantic' was not found.
Code:
Message: The request is invalid. Details: parameters : Requested value 'semantic' was not found.
This is the code that I have been using to call the search index.
import logging
from typing import Dict, Iterable, Tuple
import settings as settings
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from search import SearchableItem
TOP = 10
SKIP = 0
def main(search_text: str) -> Tuple[float, Iterable[Dict]]:
client = SearchClient(
api_version="2021-04-30-Preview",
endpoint=settings.SEARCH_SERVICE_ENDPOINT,
index_name=settings.SOCIAL_IDX_NAME,
credential=AzureKeyCredential(key=settings.SEARCH_SERVICE_KEY)
)
logging.info(f"Calling: /search?top={TOP}&skip={SKIP}&q={search_text}")
search_results = client.search(
search_text=search_text,
top=TOP,
skip=SKIP,
query_type="semantic",
include_total_count=True,
)
count: float = search_results.get_count()
results = SearchableItem.from_result_as_dict(search_results)
return count, results
if __name__ == "__main__":
count, results = main(search_text='what is a ')
print(count, list(results))
And here is my Azure configuration (I'm able to perform Semantic searches via the portal:
EDITS
Taking #Thiago Custodio's advice;
I enabled logging with:
import sys
logger = logging.getLogger('azure')
logger.setLevel(logging.DEBUG)
# Configure a console output
handler = logging.StreamHandler(stream=sys.stdout)
logger.addHandler(handler)
# ...
search_results = client.search(
search_text=search_text,
top=TOP,
skip=SKIP,
query_type="semantic",
include_total_count=True,
logging_enable=True
)
# ...
And I got the following:
DEBUG:azure.core.pipeline.policies._universal:Request URL: 'https://search.windows.net//indexes('idx-name')/docs/search.post.search?api-version=2020-06-30'
Request method: 'POST'
Request headers:
'Content-Type': 'application/json'
'Accept': 'application/json;odata.metadata=none'
'Content-Length': '86'
'x-ms-client-request-id': 'fbaafc9e-qwww-11ed-9117-a69cwa6c72e'
'api-key': '***'
'User-Agent': 'azsdk-python-search-documents/11.3.0 Python/3.11.1 (macOS-13.0-x86_64-i386-64bit)'
So this shows the request URL going out is pinned to api-version=2020-06-30 - in the Azure Portal, if I change the search version to the same, semantic search is unavailable.
I seem to have an outdated version of the search library even though I installed via:
pip install azure-search-documents
The most notable difference is that in my local azure/search/documents/_generated/operations/_documents_operations.py - the api_version seems to be hardcoded to 2020-06-30 see:
Looking at the source, I actually need the api_version to be dynamically set, so at the caller I can pass it in the search client. This is something thats already implemented within there main branch of the source, see: Source, but for some reason, my local version is different
from your code:
search_results = client.search(
search_text=search_text,
top=TOP,
skip=SKIP,
query_type="semantic",
include_total_count=True,
)
Semantic search is not a parameter, but an endpoint. Rather than calling /search, you should call /semantic
that's what you need:
def semantic_ranking():
# [START semantic_ranking]
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")
api_key = os.getenv("AZURE_SEARCH_API_KEY")
credential = AzureKeyCredential(api_key)
client = SearchClient(endpoint=endpoint,
index_name=index_name,
credential=credential)
results = list(client.search(search_text="luxury", query_type="semantic", query_language="en-us"))
note: query_type part in the last line
Fixed with:
azure-search-documents==11.4.0b3
I'm using Google's My Business API via Google's API Python Client Library.
Without further ado, here is a complete code example:
from dotenv import load_dotenv
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from os.path import exists
from pprint import pprint
import os
import pickle
load_dotenv()
API_DEVELOPER_KEY = os.getenv('API_DEVELOPER_KEY')
API_SCOPE = os.getenv('API_SCOPE')
STORED_CLIENT_CREDENTIALS = os.getenv('STORED_CLIENT_CREDENTIALS')
GOOGLE_APPLICATION_CREDENTIALS = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
def get_google_credentials(path=STORED_CLIENT_CREDENTIALS):
'''Loads stored credentials. Gets and stores new credentials if necessary.'''
if exists(path):
pickle_in = open(path, 'rb')
credentials = pickle.load(pickle_in)
else:
flow = InstalledAppFlow.from_GOOGLE_APPLICATION_CREDENTIALS_file(
GOOGLE_APPLICATION_CREDENTIALS_file=GOOGLE_APPLICATION_CREDENTIALS, scopes=API_SCOPE)
flow.run_local_server()
credentials = flow.credentials
store_google_credentials(credentials)
return credentials
def store_google_credentials(credentials, path=STORED_CLIENT_CREDENTIALS):
'''Store credentials for future reuse to avoid authenticating every time.'''
pickle_out = open(path, 'wb')
pickle.dump(credentials, pickle_out)
pickle_out.close()
def get_google_api_interface(credentials, service_name, service_version, service_discovery_url=None):
'''Get a resource object with methods for interacting with Google's API.'''
return build(service_name,
service_version,
credentials=credentials,
developerKey=API_DEVELOPER_KEY,
discoveryServiceUrl=service_discovery_url)
def extract_dict_key(dict, key):
'''Utility to extract particular values from a dictionary by their key.'''
return [d[key] for d in dict]
def transform_list_to_string(list, separator=' '):
return separator.join(map(str, list))
def get_google_account_names():
'''Get a list of all account names (unique ids).'''
google = get_google_api_interface(
get_google_credentials(),
service_name='mybusinessaccountmanagement',
service_version='v1',
service_discovery_url='https://mybusinessaccountmanagement.googleapis.com/$discovery/rest?version=v1')
accounts = google.accounts().list().execute()
return extract_dict_key(accounts['accounts'], 'name')
def get_google_store_reviews(account_name):
'''Get all store reviews for a specific account from Google My Business.'''
google = get_google_api_interface(
get_google_credentials(),
service_name='mybusiness',
service_version='v4',
service_discovery_url='https://mybusiness.googleapis.com/$discovery/rest?version=v4')
return google.accounts().locations().batchGetReviews(account_name).execute()
account_names = get_google_account_names()
pprint(account_names)
first_account_name = account_names[0]
pprint(get_google_store_reviews(first_account_name))
And here is the contents of .env:
API_DEVELOPER_KEY = ********
API_SCOPE = https://www.googleapis.com/auth/business.manage
STORED_CLIENT_CREDENTIALS = secrets/credentials.pickle
GOOGLE_APPLICATION_CREDENTIALS = secrets/client_secrets.json
My function get_google_account_names() works fine and returns the expected data:
['accounts/******************020',
'accounts/******************098',
'accounts/******************872',
'accounts/******************021',
'accounts/******************112']
I have tested and validated get_google_credentials() to ensure that CLIENT_CREDENTIALS and API_DEVELOPER_KEY are indeed loaded correctly and working.
Also, in .env, I'm setting the environment variable GOOGLE_APPLICATION_CREDENTIALS to the client_secret.json path, as required some methods in Google's Python Client Library.
My function get_google_store_reviews(), however, results in this error:
Traceback (most recent call last):
File "/my-project-dir/my-script.py", line 88, in <module>
pprint(get_google_store_reviews())
File "/my-project-dir/my-script.py", line 76, in get_google_store_reviews
google = get_google_api_interface(
File "/my-project-dir/my-script.py", line 46, in get_google_api_interface
return build(service_name,
File "/my-project-dir/.venv/lib/python3.9/site-packages/googleapiclient/_helpers.py", line 131, in positional_wrapper
return wrapped(*args, **kwargs)
File "/my-project-dir/.venv/lib/python3.9/site-packages/googleapiclient/discovery.py", line 324, in build
raise UnknownApiNameOrVersion("name: %s version: %s" % (serviceName, version))
googleapiclient.errors.UnknownApiNameOrVersion: name: mybusiness version: v4
I have also tried v1 of the Discovery Document with the same result.
Does anyone know what's going on here? It seems like the API mybusiness is not discoverable via the Discovery Document provided by Google, but I'm not sure how to verify my suspicion.
Note that this and this issue is related, but not exactly the same. The answers in those questions are old don't seem to be applicable anymore after recent changes by Google.
Update:
As a commenter pointed out, this API appears to be deprecated. That might explain the issues I'm having, however, Google's documentation states:
"Deprecated indicates that the version of the API will continue to function […]"
Furthermore, notice that even though the top-level accounts.locations is marked as deprecated, some other the underlying methods (including batchGetReviews) are not.
See screenshot for more details:
This issue has also been reported in GitHub.
The batchGetReviews method expects a single account as the path parameter.
You should thus loop over get_google_account_names() and call .batchGetReviews(google_account) instead of .batchGetReviews(google_accounts).
I'm trying to access Google Ads campaing reports from Python folowing this tutorial.
I've requested my Developer Token with Basic Access. I think it has enough privileges to execute the script. I Can see my token active when I go to "API Center" in google ads.
I've created a project in google cloud and an Oauth Token.
In google Cloud:
Created a new project
Activated the Google Ads API.
When I go to Manage-> Credentials I see that the Oauth token is compatible with that API.
I have successfully created a refresh token.
I'm using this script as proof of concept:
import os
import json
import sys
from google.ads.google_ads.errors import GoogleAdsException
# Put an account id to download stats from. Note: not MCC, no dash lines
CUSTOMER_ID = "xxxxxxxxxx"
def get_account_id(account_id, check_only=False):
"""
Converts int to str, checks if str has dashes. Returns 10 chars str or raises error
:check_only - if True, returns None instead of Error
"""
if isinstance(account_id, int) and len(str(account_id)) == 10:
return str(account_id)
if isinstance(account_id, str) and len(account_id.replace("-", "")) == 10:
return account_id.replace("-", "")
if check_only:
return None
raise ValueError(f"Couldn't recognize account id from {account_id}")
def micros_to_currency(micros):
return micros / 1000000.0
def main(client, customer_id):
ga_service = client.get_service("GoogleAdsService")# , version="v5")
query = """
SELECT
campaign.id,
campaign.name,
ad_group.id,
ad_group.name,
ad_group_criterion.criterion_id,
ad_group_criterion.keyword.text,
ad_group_criterion.keyword.match_type,
metrics.impressions,
metrics.clicks,
metrics.cost_micros
FROM keyword_view
WHERE
segments.date DURING LAST_7_DAYS
AND campaign.advertising_channel_type = 'SEARCH'
AND ad_group.status = 'ENABLED'
AND ad_group_criterion.status IN ('ENABLED', 'PAUSED')
ORDER BY metrics.impressions DESC
LIMIT 50"""
# Issues a search request using streaming.
response = ga_service.search_stream(customer_id, query) #THIS LINE GENERATES THE ERROR
keyword_match_type_enum = client.get_type(
"KeywordMatchTypeEnum"
).KeywordMatchType
try:
for batch in response:
for row in batch.results:
campaign = row.campaign
ad_group = row.ad_group
criterion = row.ad_group_criterion
metrics = row.metrics
keyword_match_type = keyword_match_type_enum.Name(
criterion.keyword.match_type
)
print(
f'Keyword text "{criterion.keyword.text}" with '
f'match type "{keyword_match_type}" '
f"and ID {criterion.criterion_id} in "
f'ad group "{ad_group.name}" '
f'with ID "{ad_group.id}" '
f'in campaign "{campaign.name}" '
f"with ID {campaign.id} "
f"had {metrics.impressions} impression(s), "
f"{metrics.clicks} click(s), and "
f"{metrics.cost_micros} cost (in micros) during "
"the last 7 days."
)
except GoogleAdsException as ex:
print(
f'Request with ID "{ex.request_id}" failed with status '
f'"{ex.error.code().name}" and includes the following errors:'
)
for error in ex.failure.errors:
print(f'\tError with message "{error.message}".')
if error.location:
for field_path_element in error.location.field_path_elements:
print(f"\t\tOn field: {field_path_element.field_name}")
sys.exit(1)
if __name__ == "__main__":
# credentials dictonary
creds = {"google_ads": "googleads.yaml"}
if not os.path.isfile(creds["google_ads"]):
raise FileExistsError("File googleads.yaml doesn't exists. ")
resources = {"config": "config.json"}
# This logging allows to see additional information on debugging
import logging
logging.basicConfig(level=logging.INFO, format='[%(asctime)s - %(levelname)s] %(message).5000s')
logging.getLogger('google.ads.google_ads.client').setLevel(logging.DEBUG)
# Initialize the google_ads client
from google.ads.google_ads.client import GoogleAdsClient
gads_client = GoogleAdsClient.load_from_storage(creds["google_ads"])
id_to_load = get_account_id(CUSTOMER_ID)
main(gads_client, id_to_load)
I've changed CUSTOMER_ID to the account number that appears on the upper left corner
I've created a googleads.yaml and I've loaded the aforementioned information.
When I execute the script I get this error:
Traceback (most recent call last):
File "download_keywords_from_account.py", line 138, in <module>
main(gads_client, id_to_load)
File "download_keywords_from_account.py", line 70, in main
response = ga_service.search_stream(customer_id, query)
File "google/ads/google_ads/v6/services/google_ads_service_client.py", line 366, in search_stream
return self._inner_api_calls['search_stream'](request, retry=retry, timeout=timeout, metadata=metadata)
File google/api_core/gapic_v1/method.py", line 145, in __call__
return wrapped_func(*args, **kwargs)
File "google/api_core/retry.py", line 281, in retry_wrapped_func
return retry_target(
File "google/api_core/retry.py", line 184, in retry_target
return target()
File "google/api_core/timeout.py", line 214, in func_with_timeout
return func(*args, **kwargs)
File "google/api_core/grpc_helpers.py", line 152, in error_remapped_callable
six.raise_from(exceptions.from_grpc_error(exc), exc)
File "<string>", line 3, in raise_from
google.api_core.exceptions.PermissionDenied: 403 Request had insufficient authentication scopes
The googleads.yaml file looks like this:
#############################################################################
# Required Fields #
#############################################################################
developer_token: {developer token as seen in google ads -> tools -> api center}
#############################################################################
# Optional Fields #
#############################################################################
login_customer_id: {Id from the top left corner in google ads, only numbers}
# user_agent: INSERT_USER_AGENT_HERE
# partial_failure: True
validate_only: False
#############################################################################
# OAuth2 Configuration #
# Below you may provide credentials for either the installed application or #
# service account flows. Remove or comment the lines for the flow you're #
# not using. #
#############################################################################
# The following values configure the client for the installed application
# flow.
client_id: {Oauth client id taken from gcloud -> api -> credentials} ends with apps.googleusercontent.com
client_secret: {got it while generating the token}
refresh_token: 1//0hr.... made with generate_refresh_token.py
# The following values configure the client for the service account flow.
path_to_private_key_file: ads.json
# delegated_account: INSERT_DOMAIN_WIDE_DELEGATION_ACCOUNT
#############################################################################
# ReportDownloader Headers #
# Below you may specify boolean values for optional headers that will be #
# applied to all requests made by the ReportDownloader utility by default. #
#############################################################################
# report_downloader_headers:
# skip_report_header: False
# skip_column_header: False
# skip_report_summary: False
# use_raw_enum_values: False
NOTES:
The file ads.json contains the private key downloaded from the credentials page in gcloud.
I've seen some posts on this issue but none of them are Python + GoogleADs and I couldn't find a solution there either.
I have also tried other Python + GoogleAds examples getting the same error. This makes me
think that I must be configuring something wrong in gcloud / google ads. But I don't understand what.
Please help me make the query I'm really stuck.
Thanks a lot!
Comments of #DazWilkin solved my problem. Thanks!
I'm trying to use Raspi 3B+ and AutoML Vision to train a model for classification. However, when I try to create a dataset on Google Cloud Platform, it runs into a problem as follows:
Traceback (most recent call last):
File "/home/pi/.local/lib/python3.7/site-packages/google/api core/grpc helpers.py", line 57, in error remapped callable
return callable (*args, **kwargs)
File "/home/pi/.local/lib/python3.7/site-packages/grpc/ channel.py", line 826, in call __
return end_unary_response blocking({state, call, False, None)
File "/home/pi/.local/lib/python3.7/site-packages/grpc/ channel.py", line 729, in end unary response blocking
raise InactiveRpcError(state)
grpc. channel. InactiveRpcError: < InactiveRpcError of RPC that terminated with:
status = StatusCode. INVALID ARGUMENT
details = "List of found errors: 1.Field: parent; Message: Required field is invalid "
debug error string = "{"created":"G1604833054.567218256", "description":"Error received from peer ipv6: [2a00:1450:400a: 801: :200a] :443","file":"src/core/lib/surface/call.cc","file line":1056,"grpc_message":"List of found
errors:\tl.Field: parent; Nessage: Required field is invalid\t","grpc_ status":3}"
>
The creating-dataset code is
automl_client = automl.AutoMlClient()
project_location = automl_client.location_path(project_id, region_name)
bucket = storage_client.bucket(bucket_name)
# upload the images to google cloud bucket
upload_image_excel(bucket, bucket_name, dataset_name, status_list, csv_name)
# Create a new automl dataset programatically
classification_type = 'MULTICLASS'
dataset_metadata = {'classification_type': classification_type}
dataset_config = {
'display_name': dataset_name,
'image_classification_dataset_metadata': dataset_metadata
}
dataset = automl_client.create_dataset(project_location, dataset_config)
dataset_id = dataset.name.split('/')[-1]
dataset_full_id = automl_client.dataset_path(
project_id, region_name, dataset_id
)
# Read the *.csv file on Google Cloud
remote_csv_path = 'gs://{0}/{1}'.format(bucket_name, csv_name)
input_uris = remote_csv_path.split(',')
input_config = {'gcs_source': {'input_uris': input_uris}}
response = automl_client.import_data(dataset_full_id, input_config)
Does anyone know what's happening here?
Which region are you using? Be aware that for this feature, currently project resources must be in the us-central1 region to use this API [1].
The error promting is an INVALID ARGUMENT therefore I do not think the above mentioned is the issue. Looking at the GCP documentation on Creating a dataset [1] I see your code differs from what is done on that sample. The metadata and the configuration is set in a different way. Could you please try to recreate it using the same format as in the sample shared? I believe this should resolve the issue being experienced.
Here you have a code example:
from google.cloud import automl
# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# display_name = "your_datasets_display_name"
client = automl.AutoMlClient()
# A resource that represents Google Cloud Platform location.
project_location = f"projects/{project_id}/locations/us-central1"
# Specify the classification type
# Types:
# MultiLabel: Multiple labels are allowed for one example.
# MultiClass: At most one label is allowed per example.
# https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#classificationtype
metadata = automl.ImageClassificationDatasetMetadata(
classification_type=automl.ClassificationType.MULTILABEL
)
dataset = automl.Dataset(
display_name=display_name,
image_classification_dataset_metadata=metadata,
)
# Create a dataset with the dataset metadata in the region.
response = client.create_dataset(parent=project_location, dataset=dataset)
created_dataset = response.result()
# Display the dataset information
print("Dataset name: {}".format(created_dataset.name))
print("Dataset id: {}".format(created_dataset.name.split("/")[-1]))
[1] https://cloud.google.com/vision/automl/docs/create-datasets#automl_vision_classification_create_dataset-python
I am currently looking for a way to upload a video to Azure Media Services (AMS v3) via Python SDKs. I have followed its instruction, and am able to connect to AMS successfully.
Example
credentials = AdalAuthentication(
context.acquire_token_with_client_credentials,
RESOURCE,
CLIENT,
KEY)
client = AzureMediaServices(credentials, SUBSCRIPTION_ID) # Successful
I also successfully get all the videos' details uploaded via its portal
for data in client.assets.list(RESOUCE_GROUP_NAME, ACCOUNT_NAME).get(0):
print(f'Asset_name: {data.name}, file_name: {data.description}')
# Asset_name: 4f904060-d15c-4880-8c5a-xxxxxxxx, file_name: 夢想全紀錄.mp4
# Asset_name: 8f2e5e36-d043-4182-9634-xxxxxxxx, file_name: an552Qb_460svvp9.webm
# Asset_name: aef495c1-a3dd-49bb-8e3e-xxxxxxxx, file_name: world_war_2.webm
# Asset_name: b53d8152-6ecd-41a2-a59e-xxxxxxxx, file_name: an552Qb_460svvp9.webm - Media Encoder Standard encoded
However, when I tried to use the following method; it failed. Since I have no idea what to parse as parameters - Link to Python SDKs
create_or_update(resource_group_name, account_name, asset_name,
parameters, custom_headers=None, raw=False, **operation_config)
Therefore, I would like to ask questions as follows (everything is done via Python SDKs):
What kind of parameters does it expect?
Can a video be uploaded directly to AMS or it should be uploaded to Blob Storage first?
Should an Asset contain only one video or multiple files are fine?
The documentation for the REST version of that method is at https://learn.microsoft.com/en-us/rest/api/media/assets/createorupdate. This is effectively the same as the Python parameters.
Videos are stored in Azure Storage for Media Services. This is true for input assets, the assets that are encoded, and any streamed content. It all is in Storage but accessed by Media Services. You do need to create an asset in Media Services which creates the Storage container. Once the Storage container exists you upload via the Storage APIs to that Media Services created container.
Technically multiple files are fine, but there are a number of issues with doing that that you may not expect. I'd recommend using 1 input video = 1 Media Services asset. On the encoding output side there will be more than one file in the asset. Encoding output contains one or more videos, manifests, and metadata files.
I have found my method to work around using Python SDKs and REST; however, I am not quite sure it's proper.
Log-In to Azure Media Services and Blob Storage via Python packages
import adal
from msrestazure.azure_active_directory import AdalAuthentication
from msrestazure.azure_cloud import AZURE_PUBLIC_CLOUD
from azure.mgmt.media import AzureMediaServices
from azure.mgmt.media.models import MediaService
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
Create Assets for an original file and an encoded one by parsing these parameters. Example of the original file Asset creation.
asset_name = 'asset-myvideo'
asset_properties = {
'properties': {
'description': 'Original File Description',
'storageAccountName': "storage-account-name"
}
}
client.assets.create_or_update(RESOUCE_GROUP_NAME, ACCOUNT_NAME, asset_name, asset_properties)
Upload a video to the Blob Storage derived from the created original asset
current_container = [data.container for data in client.assets.list(RESOUCE_GROUP_NAME, ACCOUNT_NAME).get(0) if data.name == asset_name][0] # Get Blob Storage location
file_name = "myvideo.mp4"
blob_client = blob_service_client.get_blob_client(container=current_container, blob=file_name)
with open('original_video.mp4', 'rb') as data:
blob_client.upload_blob(data)
print(f'Video uploaded to {current_container}')
And after that, I do Transform, Job, and Streaming Locator to get the video Streaming Link successfully.
I was able to get this to work with the newer python SDK. The python documentation is mostly missing, so I constructed this mainly from the python SDK source code and the C# examples.
azure-storage-blob==12.3.1
azure-mgmt-media==2.1.0
azure-mgmt-resource==9.0.0
adal~=1.2.2
msrestazure~=0.6.3
0) Import a lot of stuff
from azure.mgmt.media.models import Asset, Transform, Job,
BuiltInStandardEncoderPreset, TransformOutput, \
JobInputAsset, JobOutputAsset, AssetContainerSas, AssetContainerPermission
import adal
from msrestazure.azure_active_directory import AdalAuthentication
from msrestazure.azure_cloud import AZURE_PUBLIC_CLOUD
from azure.mgmt.media import AzureMediaServices
from azure.storage.blob import BlobServiceClient, ContainerClient
import datetime as dt
import time
LOGIN_ENDPOINT = AZURE_PUBLIC_CLOUD.endpoints.active_directory
RESOURCE = AZURE_PUBLIC_CLOUD.endpoints.active_directory_resource_id
# AzureSettings is a custom NamedTuple
1) Log in to AMS:
def get_ams_client(settings: AzureSettings) -> AzureMediaServices:
context = adal.AuthenticationContext(LOGIN_ENDPOINT + '/' +
settings.AZURE_MEDIA_TENANT_ID)
credentials = AdalAuthentication(
context.acquire_token_with_client_credentials,
RESOURCE,
settings.AZURE_MEDIA_CLIENT_ID,
settings.AZURE_MEDIA_SECRET
)
return AzureMediaServices(credentials, settings.AZURE_SUBSCRIPTION_ID)
2) Create an input and output asset
input_asset = create_or_update_asset(
input_asset_name, "My Input Asset", client, azure_settings)
input_asset = create_or_update_asset(
output_asset_name, "My Output Asset", client, azure_settings)
3) Get the Container Name. (most documentation refers to BlockBlobService, which is seems to have been removed from the SDK)
def get_container_name(client: AzureMediaServices, asset_name: str, settings: AzureSettings):
expiry_time = dt.datetime.now(dt.timezone.utc) + dt.timedelta(hours=4)
container_list: AssetContainerSas = client.assets.list_container_sas(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
asset_name=asset_name,
permissions = AssetContainerPermission.read_write,
expiry_time=expiry_time
)
sas_uri: str = container_list.asset_container_sas_urls[0]
container_client: ContainerClient = ContainerClient.from_container_url(sas_uri)
return container_client.container_name
4) Upload a file the the input asset container:
def upload_file_to_asset_container(
container: str, local_file, uploaded_file_name, settings: AzureSettings):
blob_service_client = BlobServiceClient.from_connection_string(settings.AZURE_MEDIA_STORAGE_CONNECTION_STRING))
blob_client = blob_service_client.get_blob_client(container=container, blob=uploaded_file_name)
with open(local_file, 'rb') as data:
blob_client.upload_blob(data)
5) Create a transform (in my case, using the adaptive streaming preset):
def get_or_create_transform(
client: AzureMediaServices,
transform_name: str,
settings: AzureSettings):
transform_output = TransformOutput(preset=BuiltInStandardEncoderPreset(preset_name="AdaptiveStreaming"))
transform: Transform = client.transforms.create_or_update(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
transform_name=transform_name,
outputs=[transform_output]
)
return transform
5) Submit the Job
def submit_job(
client: AzureMediaServices,
settings: AzureSettings,
input_asset: Asset,
output_asset: Asset,
transform_name: str,
correlation_data: dict) -> Job:
job_input = JobInputAsset(asset_name=input_asset.name)
job_outputs = [JobOutputAsset(asset_name=output_asset.name)]
job: Job = client.jobs.create(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
job_name=f"test_job_{UNIQUENESS}",
transform_name=transform_name,
parameters=Job(input=job_input,
outputs=job_outputs,
correlation_data=correlation_data)
)
return job
6) Then I get the URLs after the Event Grid has told me the job is done:
# side-effect warning: this starts the streaming endpoint $$$
def get_urls(client: AzureMediaServices, output_asset_name: str
locator_name: str):
try:
locator: StreamingLocator = client.streaming_locators.create(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_locator_name=locator_name,
parameters=StreamingLocator(
asset_name=output_asset_name,
streaming_policy_name="Predefined_ClearStreamingOnly"
)
)
except Exception as ex:
print("ignoring existing")
streaming_endpoint: StreamingEndpoint = client.streaming_endpoints.get(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_endpoint_name="default")
if streaming_endpoint:
if streaming_endpoint.resource_state != "Running":
client.streaming_endpoints.start(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_endpoint_name="default"
)
paths = client.streaming_locators.list_paths(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_locator_name=locator_name
)
return [f"https://{streaming_endpoint.host_name}{path.paths[0]}" for path in paths.streaming_paths]