Google Analytics core reporting API, fetch and dump - python

I'm trying to write a google analytics connector in a lambda function using python to fetch and store all the metrics and dimensions values that the Google Core Reporting API provides. As of now, I'm able to query the individual metrics/dimensions values from the api but unsure how to dump all the data as json as it only returns values which I'm asking for.
"""Hello Analytics Reporting API V4."""
import argparse
from apiclient.discovery import build
import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools
SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
CLIENT_SECRETS_PATH = 'client_secrets.json' # Path to client_secrets.json file.
VIEW_ID = 'xxxxxxx'
def initialize_analyticsreporting():
"""Initializes the analyticsreporting service object.
Returns:
analytics an authorized analyticsreporting service object.
"""
# Parse command-line arguments.
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[tools.argparser])
flags = parser.parse_args([])
# Set up a Flow object to be used if we need to authenticate.
flow = client.flow_from_clientsecrets(
CLIENT_SECRETS_PATH, scope=SCOPES,
message=tools.message_if_missing(CLIENT_SECRETS_PATH))
# Prepare credentials, and authorize HTTP object with them.
# If the credentials don't exist or are invalid run through the native client
# flow. The Storage object will ensure that if successful the good
# credentials will get written back to a file.
storage = file.Storage('analyticsreporting.dat')
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = tools.run_flow(flow, storage, flags)
http = credentials.authorize(http=httplib2.Http())
# Build the service object.
analytics = build('analyticsreporting', 'v4', http=http)
return analytics
def get_report(analytics):
# Use the Analytics Service Object to query the Analytics Reporting API V4.
return analytics.reports().batchGet(
body={
"reportRequests": [
{
"viewId": VIEW_ID,
"metrics": []
}]
}
).execute()
def print_response(response):
"""Parses and prints the Analytics Reporting API V4 response"""
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
rows = report.get('data', {}).get('rows', [])
for row in rows:
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
for header, dimension in zip(dimensionHeaders, dimensions):
print (header + ': ' + dimension)
for i, values in enumerate(dateRangeValues):
print ('Date range (' + str(i) + ')')
for metricHeader, value in zip(metricHeaders, values.get('values')):
print (metricHeader.get('name') + ': ' + value)
def main():
analytics = initialize_analyticsreporting()
response = get_report(analytics)
print_response(response)
if __name__ == '__main__':
main()
Existing code snippet for fetching data and the current output it produces
Date range (0)
ga:visits: 6
Instead of this, I'm trying to get all the 500+ metrics that Google Analytics provides.

As of now, I'm able to query the individual metrics/dimensions values
from the api but unsure how to dump all the data as json as it only
returns values which I'm asking for.
Yes that's how the API works: you need to query for specific dimensions and metrics and you only get what you asked for.
I'm trying to get all the 500+ metrics that Google Analytics provides.
Out of the box you can't: GA API limits you to querying 7 dimensions + 10 metrics at a time (see below v3 documentation, same applies to v4):
https://developers.google.com/analytics/devguides/reporting/core/v3/reference#largeDataResults
"allowing a maximum of 7 dimensions and 10 metrics in any one API request"
The workaround is to use a custom dimension as identifier such as User ID + session ID through which you can identify uniquely each session, and thus run multiple API queries to gather more dimensions/metrics, and then re-aggregate the data based on that custom dimension.
Here is a library that explains in more details:
https://github.com/aiqui/ga-download

Related

Using Google OAuth2 with Analytics Reporting to serve user data Python

I am trying to setup a google analytics reporting tool, but would like to use the OAuth2 client to allow users to log in with a 'log in with google' button in order to view their data.
I am using the quick start guide from Analytics V4 Reporting which gives a file with the below content:
"""A simple example of how to access the Google Analytics API."""
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
def get_service(api_name, api_version, scopes, key_file_location):
"""Get a service that communicates to a Google API.
Args:
api_name: The name of the api to connect to.
api_version: The api version to connect to.
scopes: A list auth scopes to authorize for the application.
key_file_location: The path to a valid service account JSON key file.
Returns:
A service that is connected to the specified API.
"""
credentials = ServiceAccountCredentials.from_json_keyfile_name(
key_file_location, scopes=scopes)
# Build the service object.
service = build(api_name, api_version, credentials=credentials)
return service
def get_first_profile_id(service):
# Use the Analytics service object to get the first profile id.
# Get a list of all Google Analytics accounts for this user
accounts = service.management().accounts().list().execute()
if accounts.get('items'):
# Get the first Google Analytics account.
account = accounts.get('items')[0].get('id')
# Get a list of all the properties for the first account.
properties = service.management().webproperties().list(
accountId=account).execute()
if properties.get('items'):
# Get the first property id.
property = properties.get('items')[0].get('id')
# Get a list of all views (profiles) for the first property.
profiles = service.management().profiles().list(
accountId=account,
webPropertyId=property).execute()
if profiles.get('items'):
# return the first view (profile) id.
return profiles.get('items')[0].get('id')
return None
def get_results(service, profile_id):
# Use the Analytics Service Object to query the Core Reporting API
# for the number of sessions within the past seven days.
return service.data().ga().get(
ids='ga:' + profile_id,
start_date='7daysAgo',
end_date='today',
metrics='ga:sessions').execute()
def print_results(results):
# Print data nicely for the user.
if results:
print 'View (Profile):', results.get('profileInfo').get('profileName')
print 'Total Sessions:', results.get('rows')[0][0]
else:
print 'No results found'
def main():
# Define the auth scopes to request.
scope = 'https://www.googleapis.com/auth/analytics.readonly'
key_file_location = '<REPLACE_WITH_JSON_FILE>'
# Authenticate and construct service.
service = get_service(
api_name='analytics',
api_version='v3',
scopes=[scope],
key_file_location=key_file_location)
profile_id = get_first_profile_id(service)
print_results(get_results(service, profile_id))
if __name__ == '__main__':
main()
This has a function that gets service account info from a hard-coded json file:
ServiceAccountCredentials.from_json_keyfile_name(key_file_location, scopes=scopes)
And then uses that info to get info using the hard-coded file.
service = get_service(
api_name='analytics',
api_version='v3',
scopes=[scope],
key_file_location=key_file_location)
This is fine if I want just my own data, but how do I go about retrieving the information (analytics data, scopes, view ID etc) using a simple authentication in python? I haven't been able to stitch the documentation for OAuth2 together for this use case.
If anyone could help me, or point me to a resource I could use that would be great.
Thanks.

Google Analytics fetch view I'd using API

I'm trying to build a support analytics tool which will let the end-user to create a dashboard of his own using data from multiple sources, one such source is Google Analytics. I used Google Analytics Core reporting API to fetch the data. However as of now, I'm manually inserting the view I'd of my user account to fetch the data. Since I'm building it for end users, I need to be able to programmatically(using API) to fetch the view i'd of a user account when they are authorizing my app using oauth. I have seen tools like databox which have achieved this so wondering how to replicate the same. Here's the code snippet I'm using
import argparse
from apiclient.discovery import build
import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools
SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
CLIENT_SECRETS_PATH = 'client_secrets.json' # Path to client_secrets.json file.
VIEW_ID = 'xxxxxx' #manually inserted view I'd here
def initialize_analyticsreporting():
"""Initializes the analyticsreporting service object.
Returns:
analytics an authorized analyticsreporting service object.
"""
# Parse command-line arguments.
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[tools.argparser])
flags = parser.parse_args([])
# Set up a Flow object to be used if we need to authenticate.
flow = client.flow_from_clientsecrets(
CLIENT_SECRETS_PATH, scope=SCOPES,
message=tools.message_if_missing(CLIENT_SECRETS_PATH))
# Prepare credentials, and authorize HTTP object with them.
# If the credentials don't exist or are invalid run through the native client
# flow. The Storage object will ensure that if successful the good
# credentials will get written back to a file.
storage = file.Storage('analyticsreporting.dat')
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = tools.run_flow(flow, storage, flags)
http = credentials.authorize(http=httplib2.Http())
# Build the service object.
analytics = build('analyticsreporting', 'v4', http=http)
return analytics
def get_report(analytics):
# Use the Analytics Service Object to query the Analytics Reporting API V4.
return analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': VIEW_ID,
'dateRanges': [{'startDate': '7daysAgo', 'endDate': 'today'}],
'metrics': [{'expression': 'ga:sessions'}]
}]
}
).execute()
def print_response(response):
"""Parses and prints the Analytics Reporting API V4 response"""
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
rows = report.get('data', {}).get('rows', [])
for row in rows:
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
for header, dimension in zip(dimensionHeaders, dimensions):
print (header + ': ' + dimension)
for i, values in enumerate(dateRangeValues):
print ('Date range (' + str(i) + ')')
for metricHeader, value in zip(metricHeaders, values.get('values')):
print (metricHeader.get('name') + ': ' + value)
def main():
analytics = initialize_analyticsreporting()
response = get_report(analytics)
print_response(response)
if __name__ == '__main__':
main()
I figured out the answer myself. We have to use Management API to fetch all View Id's of a particular account and pass that as a parameter to core reporting api for getting the metrics/dimensions.

Making a request to the Google Analtycis API responds with Login Required

I get data from the Google Analytics API v3
https://www.googleapis.com/analytics/v3/data/ga?ids=ga:181335694&metrics=ga:sessions&start-date=7daysAgo&end-date=today
Once I run the browser is throwing an error
{
"error":{
"errors":[
{
"domain":"global",
"reason":"required",
"message":"Login Required",
"locationType":"header",
"location":"Authorization"
}
],
"code":401,
"message":"Login Required"
}
}
How to resolve this error?
Once I run python code I get the Google Analytics api, but I run in browser is throwing error for login is required how to resolve it.
There are two types of data when we are talking about Google APIs.
Public data data that is not owned by anyone and everyone can look at
Private data which is owned by a user and you must have permission to access it.
"Login Required",
Means exactly that you must be authenticated in order to access the data you are trying to access. You need the permission of the owner of that data. You cant just take that get string and kick it off in a browser you need an access token in order to do that. You get an access token from the authentication flow.
Since you mentioned python you should be following the hello analytics tutorial which will show you how to set up your project and authenticate your script so that you can get access to the data you need.
import argparse
from apiclient.discovery import build
import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools
def get_service(api_name, api_version, scope, client_secrets_path):
"""Get a service that communicates to a Google API.
Args:
api_name: string The name of the api to connect to.
api_version: string The api version to connect to.
scope: A list of strings representing the auth scopes to authorize for the
connection.
client_secrets_path: string A path to a valid client secrets file.
Returns:
A service that is connected to the specified API.
"""
# Parse command-line arguments.
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[tools.argparser])
flags = parser.parse_args([])
# Set up a Flow object to be used if we need to authenticate.
flow = client.flow_from_clientsecrets(
client_secrets_path, scope=scope,
message=tools.message_if_missing(client_secrets_path))
# Prepare credentials, and authorize HTTP object with them.
# If the credentials don't exist or are invalid run through the native client
# flow. The Storage object will ensure that if successful the good
# credentials will get written back to a file.
storage = file.Storage(api_name + '.dat')
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = tools.run_flow(flow, storage, flags)
http = credentials.authorize(http=httplib2.Http())
# Build the service object.
service = build(api_name, api_version, http=http)
return service
def get_first_profile_id(service):
# Use the Analytics service object to get the first profile id.
# Get a list of all Google Analytics accounts for the authorized user.
accounts = service.management().accounts().list().execute()
if accounts.get('items'):
# Get the first Google Analytics account.
account = accounts.get('items')[0].get('id')
# Get a list of all the properties for the first account.
properties = service.management().webproperties().list(
accountId=account).execute()
if properties.get('items'):
# Get the first property id.
property = properties.get('items')[0].get('id')
# Get a list of all views (profiles) for the first property.
profiles = service.management().profiles().list(
accountId=account,
webPropertyId=property).execute()
if profiles.get('items'):
# return the first view (profile) id.
return profiles.get('items')[0].get('id')
return None
def get_results(service, profile_id):
# Use the Analytics Service Object to query the Core Reporting API
# for the number of sessions in the past seven days.
return service.data().ga().get(
ids='ga:' + profile_id,
start_date='7daysAgo',
end_date='today',
metrics='ga:sessions').execute()
def print_results(results):
# Print data nicely for the user.
if results:
print 'View (Profile): %s' % results.get('profileInfo').get('profileName')
print 'Total Sessions: %s' % results.get('rows')[0][0]
else:
print 'No results found'
def main():
# Define the auth scopes to request.
scope = ['https://www.googleapis.com/auth/analytics.readonly']
# Authenticate and construct service.
service = get_service('analytics', 'v3', scope, 'client_secrets.json')
profile = get_first_profile_id(service)
print_results(get_results(service, profile))
if __name__ == '__main__':
main()

Getting Error with Google Analytics API 403 Forbidden

Hello all I have been working on a Bing Analytics to Google analytics script in python. I have recently figured what was wrong with my goole analytics upload script and after fixing that I have run into a new issue. I keep getting the error There was an API error : 403 : Forbidden . I have looked at all the 403 errors that google has published information about and have corrected everything I thought was wrong and am still getting the error. If someone could help me to figure this out it would be great. Bellow is the code that I am using for my upload script.
"""A simple example of how to access the Google Analytics API."""
import argparse
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools
import csv, os, shutil, glob, datetime
from datetime import date, timedelta
from apiclient.http import MediaFileUpload
from apiclient.errors import HttpError
home = os.environ["HOME"]
dt = datetime.datetime.now().strftime("%m_%d_%y")
def get_service(api_name, api_version, scope, key_file_location,
service_account_email):
"""Get a service that communicates to a Google API.
Args:
api_name: The name of the api to connect to.
api_version: The api version to connect to.
scope: A list auth scopes to authorize for the application.
key_file_location: The path to a valid service account p12 key file.
service_account_email: The service account email address.
Returns:
A service that is connected to the specified API.
"""
credentials = ServiceAccountCredentials.from_p12_keyfile(
service_account_email, key_file_location, scopes=scope)
http = credentials.authorize(httplib2.Http())
# Build the service object.
service = build(api_name, api_version, http=http)
return service
def get_first_profile_id(service):
# Use the Analytics service object to get the first profile id.
# Get a list of all Google Analytics accounts for this user
accounts = service.management().accounts().list().execute()
if accounts.get('items'):
# Get the first Google Analytics account.
account = accounts.get('items')[0].get('id')
# Get a list of all the properties for the first account.
properties = service.management().webproperties().list(
accountId=account).execute()
if properties.get('items'):
# Get the first property id.
property = properties.get('items')[0].get('id')
# Get a list of all views (profiles) for the first property.
profiles = service.management().profiles().list(
accountId=account,
webPropertyId=property).execute()
if profiles.get('items'):
# return the first view (profile) id.
return profiles.get('items')[0].get('id')
return None
def get_results(service, profile_id):
# Use the Analytics Service Object to query the Core Reporting API
# for the number of sessions within the past seven days.
return service.data().ga().get(
ids='ga:' + profile_id,
start_date='7daysAgo',
end_date='today',
metrics='ga:sessions').execute()
def print_results(results):
# Print data nicely for the user.
if results:
print 'View (Profile): %s' % results.get('profileInfo').get('profileName')
print 'Total Sessions: %s' % results.get('rows')[0][0]
else:
print 'No results found'
def main():
# Define the auth scopes to request.
scope = ['https://www.googleapis.com/auth/analytics.readonly']
# Use the developer console and replace the values with your
# service account email and relative location of your key file.
service_account_email = 'uploader#bing-ads-analytics.iam.gserviceaccount.com'
key_file_location = home+'/Desktop/Bing-Ads-Analytics-5684236fdf8e.p12'
# Authenticate and construct service.
service = get_service('analytics', 'v3', scope, key_file_location,
service_account_email)
profile = get_first_profile_id(service)
print_results(get_results(service, profile))
try:
media = MediaFileUpload('Bing_Ad_Upload'+dt+'.csv',
mimetype='application/octet-stream',
resumable=False)
daily_upload = service.management().uploads().uploadData(
accountId='XXXXXXXX',
webPropertyId='UA-XXXXXXXX-1',
customDataSourceId='XXXXXXXXXXXXXXXXXXXXXX',
media_body=media).execute()
except TypeError, error:
# Handle errors in constructing a query.
print 'There was an error in constructing your query : %s' % error
except HttpError, error:
# Handle API errors.
print ('There was an API error : %s : %s' %
(error.resp.status, error.resp.reason))
if __name__ == '__main__':
main()
EDIT!!!
I have fount where the error is occurring. there is something wrong with the daily_upload. could someone please explain to me in better detail than what google gives where to find the accountid, webproprtyid, and customdatasourceid please and thank you.
You got accountId in:
# Get the first Google Analytics account.
account = accounts.get('items')[0].get('id')
And properties in:
# Get a list of all views (profiles) for the first property.
profiles = service.management().profiles().list(
accountId=account,
webPropertyId=property).execute()

Python with Google Analytics Querying a specific Google Account from Googles scripts

I am quite new to Python and got the following script from Google Analytics API help. I have got it working and extracting data, however, it specifies to get the first google account, I have multiple GA accounts and wish to specify just one. Any help would be great?
Thanks
Craig
"""A simple example of how to access the Google Analytics API."""
import argparse
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools
def get_service(api_name, api_version, scope, key_file_location,
service_account_email):
"""Get a service that communicates to a Google API.
Args:
api_name: The name of the api to connect to.
api_version: The api version to connect to.
scope: A list auth scopes to authorize for the application.
key_file_location: The path to a valid service account p12 key file.
service_account_email: The service account email address.
Returns:
A service that is connected to the specified API.
"""
credentials = ServiceAccountCredentials.from_p12_keyfile(
service_account_email, key_file_location, scopes=scope)
http = credentials.authorize(httplib2.Http())
# Build the service object.
service = build(api_name, api_version, http=http)
return service
def get_first_profile_id(service):
# Use the Analytics service object to get the first profile id.
# Get a list of all Google Analytics accounts for this user
accounts = service.management().accounts().list().execute()
if accounts.get('items'):
# Get the first Google Analytics account.
account = accounts.get('items')[0].get('id')
# Get a list of all the properties for the first account.
properties = service.management().webproperties().list(
accountId=account).execute()
if properties.get('items'):
# Get the first property id.
property = properties.get('items')[0].get('id')
# Get a list of all views (profiles) for the first property.
profiles = service.management().profiles().list(
accountId=account,
webPropertyId=property).execute()
if profiles.get('items'):
# return the first view (profile) id.
return profiles.get('items')[0].get('id')
return None
def get_results(service, profile_id):
# Use the Analytics Service Object to query the Core Reporting API
# for the number of sessions within the past seven days.
return service.data().ga().get(
ids='ga:' + profile_id,
start_date='7daysAgo',
end_date='today',
metrics='ga:sessions').execute()
def print_results(results):
# Print data nicely for the user.
if results:
print 'View (Profile): %s' % results.get('profileInfo').get('profileName')
print 'Total Sessions: %s' % results.get('rows')[0][0]
else:
print 'No results found'
def main():
# Define the auth scopes to request.
scope = ['https://www.googleapis.com/auth/analytics.readonly']
# Use the developer console and replace the values with your
# service account email and relative location of your key file.
service_account_email = '<Replace with your service account email address.>'
key_file_location = '<Replace with /path/to/generated/client_secrets.p12>'
# Authenticate and construct service.
service = get_service('analytics', 'v3', scope, key_file_location,
service_account_email)
profile = get_first_profile_id(service)
print_results(get_results(service, profile))
if __name__ == '__main__':
main()
Comment out (or remove) the following line:
profile = get_first_profile_id(service)
In the next line enter the id of the profile you want to query manually as the second parameter
print_results(get_results(service, '123456789'))
To get the profile id you can either visit the query explorer, a nice google tool that allows to ad hoc queries to you authenticated accounts (i.e. you need to be logged in with the Google Account that has access to analytics). You can get the profile id from the "ids" field:
Or got to your analytics account, and in the reports look at the url. It will look like
https://analytics.google.com/analytics/web/?authuser=0#report/defaultid/a1111110w65439246p123456789/
The profile id is at the end of the url (after the "p" character).
Check out this post it has code that adapts the Google Analytics Python sample code and you can add multiple profile ids.
profile_ids = profile_ids = {'My Profile 1': '1234567',
'My Profile 2': '1234568',
'My Profile 3': '1234569',
'My Profile 4': '1234561'}
# Uncomment this line & replace with 'profile name': 'id' to query a single profile
# Delete or comment out this line to loop over multiple profiles.
## profile_ids = {'ryanpraski': '1234567'}
-Ryan

Categories