** i have a google cloud function which needs to connect to url and get data in the form of csv files and store in one bucket. this is what written in python code .
when i test the function its compiling successfully but its not working at all. when i checked the log its giving the eblwo mentioned error.
favt_LnT_acn_blackline_data_pull_func43jttmffma0g Invalid constructor input for AccessSecretVersionRequest: 'projects/gcp-favt-acn-rpt-dev/secrets/blackline_api_key/versions/latest'
please find the code and suggest.
Thanks,
Vithal
**
'
import base64
import logging
import requests
#import pandas as pd
#from pandas import json_normalize
import json
import os
import datetime
from datetime import datetime as dt
import pytz
from google.cloud import storage
from google.cloud import secretmanager
def delete_and_upload_blob(landing_bucket_name,
source_file_name,
landing_blob_name,
retention_bucket_name,
file_retention_flag,
retn_file_suffix,
rpt_last_run_file):
storage_client = storage.Client()
bucket = storage_client.bucket(landing_bucket_name)
blob = bucket.blob(landing_blob_name)
rpt_last_run_blob = bucket.blob('some.csv')
retention_bucket = storage_client.bucket(retention_bucket_name)
if blob.exists(storage_client):
#Delete the old file
blob.delete()
print('File {} is deleted from Cloud Storage before
Upload'.format(landing_blob_name))
else:
print('No Such File Exists in Storage Bucket to Delete. So,
proceeding with Upload')
#Upload new one
blob.upload_from_filename(source_file_name)
print("File {} uploaded to Bucket {} With Name
{}.".format(source_file_name, bucket, landing_blob_name))
if file_retention_flag == 'Y':
#Copy the last file of the day to retention bucket
new_file_name = retn_file_suffix + '_' + landing_blob_name
blob_copy = bucket.copy_blob(blob, retention_bucket,
new_file_name)
print('File {} is copied to Retention Bucket
{}'.format(new_file_name, retention_bucket))
if rpt_last_run_blob.exists(storage_client):
#Delete the old file
rpt_last_run_blob.delete()
print('File {} is deleted from Cloud Storage before
Upload'.format(rpt_last_run_blob))
else:
print('No Such File Exists in Storage Bucket to Delete. So,
proceeding with Upload')
#Upload new one
rpt_last_run_blob.upload_from_filename(rpt_last_run_file)
print("File {} uploaded to Bucket {} With Name
{}.".format(rpt_last_run_file, bucket,
'Reports_Latest_Run_time.csv'))
def api_request():
et = pytz.timezone("US/Eastern")
current_et_time = dt.now().astimezone(et)
print('Current ET Time:', current_et_time)
pt = pytz.timezone("US/Pacific")
ut = pytz.timezone("UTC")
blackline_base_url = "https://....com"
blackline_sts_url = blackline_base_url + "/authorize/connect/token"
project_id = 'gcp-favt-acn-dev'
secret_id = '###_api_key'
secret_client = secretmanager.SecretManagerServiceClient()
secret_name =
secret_client.secret_version_path(project_id,secret_id,'latest')
secret_resp = secret_client.access_secret_version(secret_name)
api_key = secret_resp.payload.data.decode('UTF-8')
grant_type = 'password'
scope = '####'
username = '####'
payload = 'grant_type='+grant_type+'&scope='+scope+
'&username='+username+'&password='+api_key
sts_headers = { 'Authorization': 'Basic dXBzOk5KXXx2VENsSiEtRw==',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie':
'BLSIAPPEN=!bpJj4AOTHPcaqipWtDI6FrozN629M9xYLA/
sbM1DWVH+jjuY5fgHVMACha2rIapXRoB7CcqnlaHgBw=='}
response = requests.request("POST", ###_sts_url, headers =
sts_headers, data = payload)
if response.ok:
sts_response = response.json()
access_token = sts_response['access_token']
print(access_token)
blackline_rpt_submit_url = ##_base_url + '/api/queryruns'
rpt_payload = ''
blackline_rpt_api_headers =
{'Authorization': 'Bearer {}'.format(access_token), 'Content-Type':
'text/plain'}
rpt_resp = requests.request("GET", blackline_rpt_submit_url, headers
= blackline_rpt_api_headers, data = rpt_payload)
print(rpt_resp.text)
jl = json.loads(rpt_resp.text)
reports_list = []
rprts_filename = "tmp_rprts.csv"
rprts_full_path = os.path.join("/tmp",rprts_filename)
with open(rprts_full_path, 'w') as f:
f.write('ReportName,ReportLastRunTime'+'\n')
hrs = -2
hrs_to_subtract = datetime.timedelta(hours=hrs)
two_hrs_ago_time = current_et_time + hrs_to_subtract
#print(two_hrs_ago_time)#latest_rpt_check_time)
frmtd_curr_time = two_hrs_ago_time.strftime('%Y-%m-%d %H:%M:%S')
latest_rpt_check_time =
dt.strptime(frmtd_curr_time,'%Y-%m-%d %H:%M:%S')
print("Latest Report Check Time:", latest_rpt_check_time)
for each in jl:
strpd_time = dt.strptime(each['endTime'][0:19],'%Y-%m-
%dT%H:%M:%S')
#print(strpd_time)
pt_localize = pt.localize(strpd_time)
#print(pt_localize)
et_time = pt_localize.astimezone(et)
#print(et_time)
frmtd_et_time = et_time.strftime('%Y-%m-%d %H:%M:%S')
#print(frmtd_et_time)
cnvrted_endTime = dt.strptime(frmtd_et_time,'%Y-%m-%d %H:%M:%S')
#print("Report LastRun EndTime:", cnvrted_endTime)
ut_time = pt_localize.astimezone(ut)
frmtd_ut_time = ut_time.strftime('%Y-%m-%d %H:%M:%S')
if cnvrted_endTime > latest_rpt_check_time:
reports_list.append({each['name']:each['exportUrls'][0]
["url"]})
rpt_last_run = each['name']+','+frmtd_ut_time
print(rpt_last_run)
with open(rprts_full_path, 'a') as f:
f.write(rpt_last_run+'\n')
retn_file_suffix = each['endTime'][0:10]
#print(retn_file_suffix)
rpt_run_hr = cnvrted_endTime.hour
#print(rpt_run_hr)
#############
print(reports_list)
for report in reports_list:
for k in report:
print(report[k])
report_fetch_url = blackline_base_url + '/' + report[k]
print('Report Fetch URL: {}'.format(report_fetch_url))
filename = "temp_file.csv"
full_path = os.path.join("/tmp",filename)
rpt_data = requests.request("GET", report_fetch_url, headers
= blackline_rpt_api_headers)
print(rpt_data.text)
with open(full_path,'wb') as tmp_file:
tmp_file.write(rpt_data.content)
#Upload it to Cloud Storage
landing_bucket_name = "####_dev_landing_bkt" #CHANGE ME
source_file_name = os.path.join(full_path)
rpt_last_run_file = os.path.join(rprts_full_path)
landing_blob_name = '##.csv' #CHANGE ME
retention_bucket_name = '####_dev_retention_bkt'
print('file retention check')
if (rpt_run_hr >= 22):
file_retention_flag = 'Y'
else:
file_retention_flag = 'N'
print(file_retention_flag)
delete_and_upload_blob(landing_bucket_name,
source_file_name,
landing_blob_name,
retention_bucket_name,
file_retention_flag,
retn_file_suffix,
rpt_last_run_file)
#Remove the temp file after it is uploaded to Cloud Storage to
avoid OOM issues with the Cloud Function.
os.remove(full_path)
#Remove the tmp file after upload
os.remove(rprts_full_path)
#def pacific_to_eastern_conversion(pacific_time, eastern_time):
def main(event,context):
try:
if 'data' in event:
name = base64.b64decode(event['data']).decode('utf-8')
else:
name = 'World'
print('Hello{}',format(name))
api_request()
except Exception as e:
logging.error(e)' enter code here
The approach you are using will work for Cloud Run but won't work for Cloud functions.
To make use of secrets in Google cloud functions, following are the steps:
Make sure that the function's runtime service account must be granted access to the secret. To use Secret Manager with Cloud Functions, assign the roles/secretmanager.secretAccessor role to the service account associated with your function.
Make the secret accessible to the function. This can be done using either the Google Cloud Console or the gcloud command-line tool.
I exposed the secret as an environment variable(with name set to "api_key") and accessed them in the code as stated below:
import os
api_key = os.environ.get('api_key')
I hope this answers your question.
Your cloud functions service account haven't access to Secret manager. Grant your Cloud Functions service account on the secret, or on the project (not recommended).
If you don't set a custom service account on your Cloud Functions (which is also not a good practice), the App Engine default service account is used. Here the pattern <ProjectID>#appspot.gserviceaccount.com
Related
I am new to python,and want to know upload image from pixabay api or others source to wordpress using rest api and python.
When i use this :
url=url_image = "https://pixabay.com/api/?key={API_KEY}&q={keyword}.jpg"
They show this message "
{"code":"rest_upload_unknown_error","message":"Sorry, you are not
allowed to upload this file type.","data":{"status":500}}"
import base64, requests
from tempfile import NamedTemporaryFile
# keyword = input('Enter Your name')
keyword = 'flower'
def header(user, password):
credentials = user + ':' + password
token = base64.b64encode(credentials.encode())
header_json = {'Authorization': 'Basic ' + token.decode('utf-8'),
'Content-Disposition' : 'attachment; filename=%s'% "test1.jpg"}
return header_json
def upload_image_to_wordpress(file_path, header_json):
media = {'file': file_path,'caption': 'f{keyword}'}
responce = requests.post("https://yourwebsite.com/wp-json/wp/v2/media", headers = header_json, files = media)
print(responce.text)
heder = header("username","password") #username, application password
url_image = "https://pixabay.com/api/?key={API_KEY}&q={keyword}.jpg"
# url="https://cdn.pixabay.com/photo/2021/11/30/08/24/strawberries-6834750_1280.jpg"
raw = requests.get(f'{url_image}').content
with NamedTemporaryFile(delete=False,mode="wb",suffix=".jpg") as img :
img.write(raw)
# print(f.file())
c = open(img.name,"rb")
upload_image_to_wordpress(c,heder)
I've built a successful service connection to the Drive API already, and I’m creating export URLs to download each sheet in a Spreadsheet as a CSV file by sending requests with Google’s AuthorizedSession class. For some reason, only a portion of the CSV files come back correct, with the others containing broken HTML. When I send a single request, the sheet always comes back correct, but when I loop through the sheets and start sending requests things start to break. I've identified there's a problem with how I'm passing the credentials this way, but I'm not sure if I'm using AuthorizedSession correctly. Can anyone help me figure this one out?
from googleapiclient.discovery import build
from google.oauth2 import service_account
from google.auth.transport.requests import AuthorizedSession
import re
import shutil
import urllib.parse
CLIENT_SECRET_FILE = "client_secret.json"
API_NAME = "sheets"
API_VERSION = "v4"
SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]
SPREADSHEET_ID = "Spreadsheet ID goes here"
print(CLIENT_SECRET_FILE, API_NAME, API_VERSION, SCOPES, sep="-")
cred = service_account.Credentials.from_service_account_file(
CLIENT_SECRET_FILE, scopes=SCOPES
)
try:
service = build(API_NAME, API_VERSION, credentials=cred)
print(API_NAME, "service created successfully")
result = service.spreadsheets().get(spreadsheetId=SPREADSHEET_ID).execute()
export_url = re.sub("\/edit$", "/export", result["spreadsheetUrl"])
authed_session = AuthorizedSession(cred)
for sheet in result["sheets"]:
sheet_name = sheet["properties"]["title"]
params = {"format": "csv", "gid": sheet["properties"]["sheetId"]}
query_params = urllib.parse.urlencode(params)
url = export_url + "?" + query_params
response = authed_session.get(url)
file_path = "./Downloads/" + sheet_name + ".csv"
with open(file_path, "wb") as csv_file:
csv_file.write(response.content)
print("Downloaded sheet: " + sheet_name)
print("Downloads complete")
except Exception as e:
print("Unable to connect")
print(e)
This code should get you a sheetsservice
"""Hello sheets."""
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
SCOPES = ['"https://www.googleapis.com/auth/drive.readonly']
KEY_FILE_LOCATION = '<REPLACE_WITH_JSON_FILE>'
VIEW_ID = '<REPLACE_WITH_VIEW_ID>'
def initialize_sheet():
"""Initializes an sheetservice object.
Returns:
An authorized sheetservice object.
"""
credentials = ServiceAccountCredentials.from_json_keyfile_name(
KEY_FILE_LOCATION, SCOPES)
# Build the service object.
sheet= build('sheet', 'v4', credentials=credentials)
return sheet
If you use the same sheet service built by this method then you souldnt have any issues looping
I think that your script of authed_session = AuthorizedSession(cred) and response = authed_session.get(url) are correct. I thought that in your situation, the number of requests might be large in the short time, and this might be due to the reason of your issue. So as a simple modification, how about the following modification?
From:
for sheet in result["sheets"]:
sheet_name = sheet["properties"]["title"]
params = {"format": "csv", "gid": sheet["properties"]["sheetId"]}
query_params = urllib.parse.urlencode(params)
url = export_url + "?" + query_params
response = authed_session.get(url)
file_path = "./Downloads/" + sheet_name + ".csv"
with open(file_path, "wb") as csv_file:
csv_file.write(response.content)
print("Downloaded sheet: " + sheet_name)
To:
for sheet in result["sheets"]:
sheet_name = sheet["properties"]["title"]
params = {"format": "csv", "gid": sheet["properties"]["sheetId"]}
query_params = urllib.parse.urlencode(params)
url = export_url + "?" + query_params
response = authed_session.get(url)
file_path = "./Downloads/" + sheet_name + ".csv"
with open(file_path, "wb") as csv_file:
csv_file.write(response.content)
print("Downloaded sheet: " + sheet_name)
time.sleep(3) # <--- Added. Please adjust the value of 3 for your actual situation.
In this case, please use import time.
This is my code. I am trying to copy a directory from one bucket to another. I am seeing everything is positive, but files are not appearing in the clients bucket.
import boto3
ACCESS_KEY = 'access_key'
SECRET_KEY = 'secret_key'
REGION_NAME = 'US_EAST_1'
source_bucket = 'source_bucket'
#Make sure you provide / in the end
source_prefix = 'source_prefix'
target_bucket = 'target-bucket'
target_prefix = 'target-prefix'
client = boto3.client('s3')
session_src = boto3.session.Session()
source_s3_r = session_src.resource('s3')
def get_s3_keys(bucket, prefix):
keys = []
response = client.list_objects_v2(Bucket=bucket,Prefix=prefix,MaxKeys=100)
for obj in response['Contents']:
keys.append(obj['Key'])
return keys
session_dest = boto3.session.Session(aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY)
dest_s3_r = session_dest.resource('s3')
# create a reference to source image
old_obj = source_s3_r.Object(source_bucket, source_prefix)
# create a reference for destination image
new_obj = dest_s3_r.Object(target_bucket, target_prefix)
keys = get_s3_keys(source_bucket, source_prefix)
responses = []
# upload the image to destination S3 object
for filename in keys:
print("Transferring file {}, {}".format(source_bucket,filename))
old_obj = source_s3_r.Object(source_bucket, filename)
response = new_obj.put(Body=old_obj.get()['Body'].read())
response_code = response['ResponseMetadata']['HTTPStatusCode']
responses.append(response_code)
print("File transfer response {}".format(response_code))
distinct_response = list(set(responses))
if len(distinct_response) > 1 or distinct_response[0] != 200:
print("File could not be transfered to krux bucket. Exiting now")
exit(1)
else:
print("File transfer to krux bucket successful")
I am getting a successful response code of 200 but the file is not transferred across.
Srinivas, Try this
I used S3 Resource object, try equivalent S3 Client if you want...
bucket= s3.Bucket(bucket_name) #from_bucket
for osi in bucket.objects.all():
print(osi)
copy_source={
'Bucket': bucket.name,
'Key': osi.key
}
s3.Bucket('to_bucket').copy(copy_source, osi.key)
Hope it helps..
r0ck
I'm using a combination of the GCS python SDK and google API client to loop through a version-enabled bucket and download specific objects based on metadata.
from google.cloud import storage
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
def downloadepoch_objects():
request = service.objects().list(
bucket=bucket_name,
versions=True
)
response = request.execute()
for item in response['items']:
if item['metadata']['epoch'] == restore_epoch:
print(item['bucket'])
print(item['name'])
print(item['metadata']['epoch'])
print(item['updated'])
blob = source_bucket.blob(item['name'])
blob.download_to_filename(
'/Users/admin/git/data-processing/{}'.format(item))
downloadepoch_objects()
The above function works properly for a blob that is not within a directory (gs://bucketname/test1.txt) as the item that gets passed in is simply test1.txt. The issue I am running into is when trying to download files from a complex directory tree (gs://bucketname/nfs/media/docs/test1.txt) The item that gets passed is nfs/media/docs/test1.txt. Is it possible to have the .download_to_file() method to create directories if they are not present?
Below is the working solution. I ended up stripping away the path from the object name and creating the directory structure on the fly. A better way might be as #Brandon Yarbrough suggested using 'prefix + response['prefixes'][0]' but I couldn't quite figure that out. Hope this helps others out.
#!/usr/local/bin/python3
from google.cloud import storage
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json
import os
import pathlib
bucket_name = 'test-bucket'
restore_epoch = '1519189202'
restore_location = '/Users/admin/data/'
credentials = GoogleCredentials.get_application_default()
service = discovery.build('storage', 'v1', credentials=credentials)
storage_client = storage.Client()
source_bucket = storage_client.get_bucket(bucket_name)
def listall_objects():
request = service.objects().list(
bucket=bucket_name,
versions=True
)
response = request.execute()
print(json.dumps(response, indent=2))
def listname_objects():
request = service.objects().list(
bucket=bucket_name,
versions=True
)
response = request.execute()
for item in response['items']:
print(item['name'] + ' Uploaded on: ' + item['updated'] +
' Epoch: ' + item['metadata']['epoch'])
def downloadepoch_objects():
request = service.objects().list(
bucket=bucket_name,
versions=True
)
response = request.execute()
try:
for item in response['items']:
if item['metadata']['epoch'] == restore_epoch:
print('Downloading ' + item['name'] + ' from ' +
item['bucket'] + '; Epoch= ' + item['metadata']['epoch'])
print('Saving to: ' + restore_location)
blob = source_bucket.blob(item['name'])
path = pathlib.Path(restore_location + r'{}'.format(item['name'])).parent
if os.path.isdir(path):
blob.download_to_filename(restore_location + '{}'.format(item['name']))
print('Download complete')
else:
os.mkdir(path)
blob.download_to_filename(restore_location + '{}'.format(item['name']))
print('Download complete')
except Exception:
pass
# listall_objects()
# listname_objects()
downloadepoch_objects()
GCS does not have a notion of "directories," although tools like gsutil do a good job of pretending for convenience. If you want all of the objects under the "nfs/media/docs/" path, you can specify that as a prefix, like so:
request = service.objects.list(
bucket=bucket_name,
versions=True,
prefix='nfs/media/docs/', # Only show objects beginning like this
delimiter='/' # Consider this character a directory marker.
)
response = request.execute()
subdirectories = response['prefixes']
objects = response['items']
Because of the prefix parameter, only objects that begin with 'nfs/media/docs' will be returned in response['items']. Because of the delimiter parameter, "subdirectories" will be returned in response['prefixes']. You can get more details in the Python documentation of the objects.list method.
If you were to use the newer google-cloud Python library, which I'd recommended for new code, the same call would look pretty similar:
from google.cloud import storage
client = storage.Client()
bucket = client.bucket(bucket_name)
iterator = bucket.list_blobs(
versions=True,
prefix='nfs/media/docs/',
delimiter='/'
)
subdirectories = iterator.prefixes
objects = list(iterator)
Following solution worked for me. I am recursively downloading all blobs from a path prefix to a model directory at the project root, while maintaining the folder structure.
Multiple blobs are being downloaded concurrently.
GCS client version
google-cloud-storage==1.41.1
import os
from datetime import datetime
from google.cloud import storage
from concurrent.futures import ThreadPoolExecutor
BUCKET_NAME = "ml-model"
def timer(func):
def time_wrapper(*arg, **kwargs):
start = datetime.now()
func(*arg, **kwargs)
diff = datetime.now() - start
logger.info(f"{func.__name__} took {diff.seconds} s and {diff.microseconds//1000} ms")
return time_wrapper
def fetch_environment() -> str:
env = os.environ.get("environment", "staging")
return env
def create_custom_folder(dir_name: str):
if not os.path.exists(dir_name):
os.makedirs(dir_name)
def fetch_gcs_credential_file_path():
return os.environ.get("GCS_CREDENTIAL_FILE_PATH")
class GCS:
def __init__(self):
cred_file_path = fetch_gcs_credential_file_path()
self.client = storage.Client.from_service_account_json(cred_file_path)
self.bucket = self.client.bucket(BUCKET_NAME)
def download_blob(self, blob):
filename = blob.name.replace(self.path_prefix, '')
delimiter_based_splits = filename.split('/')
if len(delimiter_based_splits) > 1:
dir_name = "model/" + "/".join(delimiter_based_splits[: len(delimiter_based_splits)-1])
create_custom_folder(dir_name)
blob.download_to_filename(f"{dir_name}/{delimiter_based_splits[-1]}")
else:
blob.download_to_filename(f"model/" + filename)
#timer
def download_blobs_multithreaded(self, prefix: str):
'''
CREATE FOLDER IF NOT EXISTS
'''
create_custom_folder("model")
blobs = self.bucket.list_blobs(prefix=prefix)
self.path_prefix = prefix
with ThreadPoolExecutor() as executor:
executor.map(self.download_blob, blobs
def download_model():
env = fetch_environment()
folder_path_prefix = f"ml/{env}/{ML_MODEL_NAME}/v1/tf-saved-model/"
gcs = GCS()
gcs.download_blobs_multithreaded(folder_path_prefix)
if __name__ == '__main__':
download_model()
I want to create a Python script to backup Google Drive files as a bit of fun / learning, but I am stuck. My script below did work, but it just made the last modified date and created date of all the files on my local drive on back up as the date they were backed up, and didn't preserve the original created date / modified date as they were on Google Drive.
Here is my script:
from __future__ import print_function
import sys, httplib2, os, datetime, io
from time import gmtime, strftime
from apiclient import discovery
import oauth2client
from oauth2client import client
from oauth2client import tools
from datetime import date
#########################################################################
# Fixing OSX el capitan bug ->AttributeError: 'Module_six_moves_urllib_parse' object has no attribute 'urlencode'
os.environ["PYTHONPATH"] = "/Library/Python/2.7/site-packages"
#########################################################################
CLIENT_SECRET_FILE = 'client_secrets.json'
TOKEN_FILE="drive_api_token.json"
SCOPES = 'https://www.googleapis.com/auth/drive'
APPLICATION_NAME = 'Drive File API - Python'
OUTPUT_DIR=str(date.today())+"_drive_backup"
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
def get_credentials():
home_dir = os.path.expanduser('~')
credential_dir = os.path.join(home_dir, '.credentials')
if not os.path.exists(credential_dir):
os.makedirs(credential_dir)
credential_path = os.path.join(credential_dir, TOKEN_FILE)
store = oauth2client.file.Storage(credential_path)
credentials = store.get()
if not credentials or credentials.invalid:
flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
flow.user_agent = APPLICATION_NAME
if flags:
credentials = tools.run_flow(flow, store, flags)
else: # Needed only for compatibility with Python 2.6
credentials = tools.run(flow, store)
print('Storing credentials to ' + credential_path)
return credentials
def prepDest():
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
return True
return False
def downloadFile(file_name, file_id, file_createdDate, mimeType, service):
request = service.files().get_media(fileId=file_id)
if "application/vnd.google-apps" in mimeType:
if "document" in mimeType:
request = service.files().export_media(fileId=file_id, mimeType='application/vnd.openxmlformats-officedocument.wordprocessingml.document')
file_name = file_name + ".docx"
else:
request = service.files().export_media(fileId=file_id, mimeType='application/pdf')
file_name = file_name + ".pdf"
print("Downloading -- " + file_name)
response = request.execute()
with open(os.path.join(OUTPUT_DIR, file_name), "wb") as wer:
wer.write(response)
def listFiles(service):
def getPage(pageTok):
return service.files().list(q="mimeType != 'application/vnd.google-apps.folder'",
pageSize=1000, pageToken=pageTok, fields="nextPageToken,files(id,name, createdDate, mimeType)").execute()
pT = ''; files=[]
while pT is not None:
results = getPage(pT)
pT = results.get('nextPageToken')
files = files + results.get('files', [])
return files
def main():
credentials = get_credentials()
http = credentials.authorize(httplib2.Http())
service = discovery.build('drive', 'v3', http=http)
for item in listFiles(service):
downloadFile(item.get('name'), item.get('id'), item.get('createdDate'), item.get('mimeType'), service)
if __name__ == '__main__':
main()
To try and get the created date, you can see in the above script I added in createdDate, which looks like some of the metadata I can grab from the file:
https://developers.google.com/drive/v2/reference/files
But I don't know if I am grabbing that metadata correctly, and if so, how I actually assign it to my downloaded file.
EDIT: Really sorry but I didn't specify an OS - this is for a mac.
File v2 createdDate renamed in v3 to createdTime
The File reference you linked is for v2, but your code connects to the v3 service. When I ran your code, which uses createdDate from the v2 API, an error occurred (createdDate was an invalid metadata field).
I switched to the v3 File API, which lists the creation time as createdTime, and was able to retrieve the time without error.
File creation time changeable in Windows only
Linux/Unix does not allow setting a file's creation time, but it allows modification to the file's modified and access times via os.utime() (both times required by this function). The Drive API provides createdTime and modifiedTime but nothing for access time (which probably wouldn't make sense there), although the modification time could serve just as well for the access time.
In Windows, the file creation time could be set with win32file.SetFileTime.
Time conversion
Note that the times that are passed to the timestamp functions above are in seconds since epoch. The Drive API returns an ISO 8601 string that we convert to seconds with:
dt = datetime.datetime.strptime(dateTime, "%Y-%m-%dT%H:%M:%S.%fZ")
secs = int(dt.strftime("%s"))
Modifications
Replace all instances of createdDate with createdTime.
In listFiles() > getPage(), add modifiedTime to metadata fields:
def listFiles(service):
def getPage(pageTok):
return service.files().list(q="mimeType != 'application/vnd.google-apps.folder'",
pageSize=1000, pageToken=pageTok, fields="nextPageToken,files(id,name, createdTime, modifiedTime, mimeType)").execute()
In main()'s for-loop, pass modifiedTime to downloadFiles():
downloadFile(item.get('name'), item.get('id'), item.get('createdTime'), item.get('modifiedTime'), item.get('mimeType'), service)
In downloadFiles(), add modifiedTime to parameter list after file_createdTime.
Add these functions to set file timestamps:
def dateToSeconds(dateTime):
return int(datetime.datetime.strptime(dateTime, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%s"))
def setFileTimestamps(fname, createdTime, modifiedTime):
ctime = dateToSeconds(createdTime)
mtime = dateToSeconds(modifiedTime)
setFileCreationTime(fname, ctime)
setFileModificationTime(fname, mtime)
def setFileModificationTime(fname, newtime):
# Set access time to same value as modified time,
# since Drive API doesn't provide access time
os.utime(fname, (newtime, newtime))
def setFileCreationTime(fname, newtime):
"""http://stackoverflow.com/a/4996407/6277151"""
if os.name != 'nt':
# file creation time can only be changed in Windows
return
import pywintypes, win32file, win32con
wintime = pywintypes.Time(newtime)
winfile = win32file.CreateFile(
fname, win32con.GENERIC_WRITE,
win32con.FILE_SHARE_READ | win32con.FILE_SHARE_WRITE | win32con.FILE_SHARE_DELETE,
None, win32con.OPEN_EXISTING,
win32con.FILE_ATTRIBUTE_NORMAL, None)
win32file.SetFileTime(winfile, wintime, None, None)
winfile.close()
In downloadFiles(), call setFileTimestamps() right after writing the file (as last line of function):
def downloadFile(file_name, file_id, file_createdTime, modifiedTime, mimeType, service):
request = service.files().get_media(fileId=file_id)
if "application/vnd.google-apps" in mimeType:
if "document" in mimeType:
request = service.files().export_media(fileId=file_id, mimeType='application/vnd.openxmlformats-officedocument.wordprocessingml.document')
file_name = file_name + ".docx"
else:
request = service.files().export_media(fileId=file_id, mimeType='application/pdf')
file_name = file_name + ".pdf"
print("Downloading -- " + file_name)
response = request.execute()
prepDest()
fname = os.path.join(OUTPUT_DIR, file_name)
with open(fname, "wb") as wer:
wer.write(response)
setFileTimestamps(fname, file_createdTime, modifiedTime)
GitHub repo