I am trying to return a directory of images from the server to the client. I'm having a hard time understanding how to format my response so that the client is able to distinguish between the files.
What is the best practice way to accomplish what I am doing? my photos are kept in a tmp folder within the application. Here is my utils.py:
def get_multipart_fields():
final = {}
for file in os.listdir(os.path.dirname(os.path.realpath(__file__)) + '/tmp'):
try:
with open(os.path.dirname(os.path.abspath(__file__)) + '/tmp/' + os.path.join(file), "rb") as image_file:
encoded_string = base64.b64encode(image_file.read())
final[os.path.join(file)] = { 'filename': encoded_string }
except Exception as e:
print e
return final.to_string()
and my views.py:
#app.route('/download-files', methods=['GET'])
def gallery():
client = get_client()
resource = get_resource()
download_dir(client, resource, 'test-bucket')
file_names = get_multipart_fields()
m = MultipartEncoder(
fields=file_names
)
return Response(json.dumps(m), mimetype=m.content_type)
Where am I going wrong with this logic ? Any help would be greatly appreciated.
Related
** i have a google cloud function which needs to connect to url and get data in the form of csv files and store in one bucket. this is what written in python code .
when i test the function its compiling successfully but its not working at all. when i checked the log its giving the eblwo mentioned error.
favt_LnT_acn_blackline_data_pull_func43jttmffma0g Invalid constructor input for AccessSecretVersionRequest: 'projects/gcp-favt-acn-rpt-dev/secrets/blackline_api_key/versions/latest'
please find the code and suggest.
Thanks,
Vithal
**
'
import base64
import logging
import requests
#import pandas as pd
#from pandas import json_normalize
import json
import os
import datetime
from datetime import datetime as dt
import pytz
from google.cloud import storage
from google.cloud import secretmanager
def delete_and_upload_blob(landing_bucket_name,
source_file_name,
landing_blob_name,
retention_bucket_name,
file_retention_flag,
retn_file_suffix,
rpt_last_run_file):
storage_client = storage.Client()
bucket = storage_client.bucket(landing_bucket_name)
blob = bucket.blob(landing_blob_name)
rpt_last_run_blob = bucket.blob('some.csv')
retention_bucket = storage_client.bucket(retention_bucket_name)
if blob.exists(storage_client):
#Delete the old file
blob.delete()
print('File {} is deleted from Cloud Storage before
Upload'.format(landing_blob_name))
else:
print('No Such File Exists in Storage Bucket to Delete. So,
proceeding with Upload')
#Upload new one
blob.upload_from_filename(source_file_name)
print("File {} uploaded to Bucket {} With Name
{}.".format(source_file_name, bucket, landing_blob_name))
if file_retention_flag == 'Y':
#Copy the last file of the day to retention bucket
new_file_name = retn_file_suffix + '_' + landing_blob_name
blob_copy = bucket.copy_blob(blob, retention_bucket,
new_file_name)
print('File {} is copied to Retention Bucket
{}'.format(new_file_name, retention_bucket))
if rpt_last_run_blob.exists(storage_client):
#Delete the old file
rpt_last_run_blob.delete()
print('File {} is deleted from Cloud Storage before
Upload'.format(rpt_last_run_blob))
else:
print('No Such File Exists in Storage Bucket to Delete. So,
proceeding with Upload')
#Upload new one
rpt_last_run_blob.upload_from_filename(rpt_last_run_file)
print("File {} uploaded to Bucket {} With Name
{}.".format(rpt_last_run_file, bucket,
'Reports_Latest_Run_time.csv'))
def api_request():
et = pytz.timezone("US/Eastern")
current_et_time = dt.now().astimezone(et)
print('Current ET Time:', current_et_time)
pt = pytz.timezone("US/Pacific")
ut = pytz.timezone("UTC")
blackline_base_url = "https://....com"
blackline_sts_url = blackline_base_url + "/authorize/connect/token"
project_id = 'gcp-favt-acn-dev'
secret_id = '###_api_key'
secret_client = secretmanager.SecretManagerServiceClient()
secret_name =
secret_client.secret_version_path(project_id,secret_id,'latest')
secret_resp = secret_client.access_secret_version(secret_name)
api_key = secret_resp.payload.data.decode('UTF-8')
grant_type = 'password'
scope = '####'
username = '####'
payload = 'grant_type='+grant_type+'&scope='+scope+
'&username='+username+'&password='+api_key
sts_headers = { 'Authorization': 'Basic dXBzOk5KXXx2VENsSiEtRw==',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie':
'BLSIAPPEN=!bpJj4AOTHPcaqipWtDI6FrozN629M9xYLA/
sbM1DWVH+jjuY5fgHVMACha2rIapXRoB7CcqnlaHgBw=='}
response = requests.request("POST", ###_sts_url, headers =
sts_headers, data = payload)
if response.ok:
sts_response = response.json()
access_token = sts_response['access_token']
print(access_token)
blackline_rpt_submit_url = ##_base_url + '/api/queryruns'
rpt_payload = ''
blackline_rpt_api_headers =
{'Authorization': 'Bearer {}'.format(access_token), 'Content-Type':
'text/plain'}
rpt_resp = requests.request("GET", blackline_rpt_submit_url, headers
= blackline_rpt_api_headers, data = rpt_payload)
print(rpt_resp.text)
jl = json.loads(rpt_resp.text)
reports_list = []
rprts_filename = "tmp_rprts.csv"
rprts_full_path = os.path.join("/tmp",rprts_filename)
with open(rprts_full_path, 'w') as f:
f.write('ReportName,ReportLastRunTime'+'\n')
hrs = -2
hrs_to_subtract = datetime.timedelta(hours=hrs)
two_hrs_ago_time = current_et_time + hrs_to_subtract
#print(two_hrs_ago_time)#latest_rpt_check_time)
frmtd_curr_time = two_hrs_ago_time.strftime('%Y-%m-%d %H:%M:%S')
latest_rpt_check_time =
dt.strptime(frmtd_curr_time,'%Y-%m-%d %H:%M:%S')
print("Latest Report Check Time:", latest_rpt_check_time)
for each in jl:
strpd_time = dt.strptime(each['endTime'][0:19],'%Y-%m-
%dT%H:%M:%S')
#print(strpd_time)
pt_localize = pt.localize(strpd_time)
#print(pt_localize)
et_time = pt_localize.astimezone(et)
#print(et_time)
frmtd_et_time = et_time.strftime('%Y-%m-%d %H:%M:%S')
#print(frmtd_et_time)
cnvrted_endTime = dt.strptime(frmtd_et_time,'%Y-%m-%d %H:%M:%S')
#print("Report LastRun EndTime:", cnvrted_endTime)
ut_time = pt_localize.astimezone(ut)
frmtd_ut_time = ut_time.strftime('%Y-%m-%d %H:%M:%S')
if cnvrted_endTime > latest_rpt_check_time:
reports_list.append({each['name']:each['exportUrls'][0]
["url"]})
rpt_last_run = each['name']+','+frmtd_ut_time
print(rpt_last_run)
with open(rprts_full_path, 'a') as f:
f.write(rpt_last_run+'\n')
retn_file_suffix = each['endTime'][0:10]
#print(retn_file_suffix)
rpt_run_hr = cnvrted_endTime.hour
#print(rpt_run_hr)
#############
print(reports_list)
for report in reports_list:
for k in report:
print(report[k])
report_fetch_url = blackline_base_url + '/' + report[k]
print('Report Fetch URL: {}'.format(report_fetch_url))
filename = "temp_file.csv"
full_path = os.path.join("/tmp",filename)
rpt_data = requests.request("GET", report_fetch_url, headers
= blackline_rpt_api_headers)
print(rpt_data.text)
with open(full_path,'wb') as tmp_file:
tmp_file.write(rpt_data.content)
#Upload it to Cloud Storage
landing_bucket_name = "####_dev_landing_bkt" #CHANGE ME
source_file_name = os.path.join(full_path)
rpt_last_run_file = os.path.join(rprts_full_path)
landing_blob_name = '##.csv' #CHANGE ME
retention_bucket_name = '####_dev_retention_bkt'
print('file retention check')
if (rpt_run_hr >= 22):
file_retention_flag = 'Y'
else:
file_retention_flag = 'N'
print(file_retention_flag)
delete_and_upload_blob(landing_bucket_name,
source_file_name,
landing_blob_name,
retention_bucket_name,
file_retention_flag,
retn_file_suffix,
rpt_last_run_file)
#Remove the temp file after it is uploaded to Cloud Storage to
avoid OOM issues with the Cloud Function.
os.remove(full_path)
#Remove the tmp file after upload
os.remove(rprts_full_path)
#def pacific_to_eastern_conversion(pacific_time, eastern_time):
def main(event,context):
try:
if 'data' in event:
name = base64.b64decode(event['data']).decode('utf-8')
else:
name = 'World'
print('Hello{}',format(name))
api_request()
except Exception as e:
logging.error(e)' enter code here
The approach you are using will work for Cloud Run but won't work for Cloud functions.
To make use of secrets in Google cloud functions, following are the steps:
Make sure that the function's runtime service account must be granted access to the secret. To use Secret Manager with Cloud Functions, assign the roles/secretmanager.secretAccessor role to the service account associated with your function.
Make the secret accessible to the function. This can be done using either the Google Cloud Console or the gcloud command-line tool.
I exposed the secret as an environment variable(with name set to "api_key") and accessed them in the code as stated below:
import os
api_key = os.environ.get('api_key')
I hope this answers your question.
Your cloud functions service account haven't access to Secret manager. Grant your Cloud Functions service account on the secret, or on the project (not recommended).
If you don't set a custom service account on your Cloud Functions (which is also not a good practice), the App Engine default service account is used. Here the pattern <ProjectID>#appspot.gserviceaccount.com
I am trying to upload an image using AWS lambdas but my images get corrupted I tested with a txt file and it works great. The function checkImage is the handler
def parse_into_field_storage(fp, ctype, clength):
fs = FieldStorage(
fp=fp,
environ={'REQUEST_METHOD': 'POST'},
headers={
'content-type': ctype,
'content-length': clength
},
keep_blank_values=True
)
form = {}
files = {}
for f in fs.list:
if f.filename:
files.setdefault(f.name, []).append(f)
else:
form.setdefault(f.name, []).append(f.value)
return form, files
def checkImage(event, context):
body_file = BytesIO(bytes(event["body"], "utf-8"))
form, files = parse_into_field_storage(
body_file,
event['headers']['Content-Type'],
body_file.getbuffer().nbytes
)
fileitem = files['file'][0]
print(fileitem.filename)
# Test if the file was uploaded
if fileitem.filename:
fn = os.path.basename(fileitem.filename)
open('/tmp/' + fn, 'wb').write(fileitem.file.read())
message = 'The file "' + fn + '" was uploaded successfully'
else:
message = 'No file was uploaded'
return {
"statusCode": 200,
"body": message
}
when i print the file from python i get this
I used diff and the binaries are different. I am thinking that the image files get parse double or something like that
I am guessing here I am doing something wrong
body_file = BytesIO(bytes(event["body"], "utf-8"))
I have tried different forms like
fp = io.BytesIO(event['body'].encode('utf-8'))
but it is basically the same.
I'm using a combination of the GCS python SDK and google API client to loop through a version-enabled bucket and download specific objects based on metadata.
from google.cloud import storage
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
def downloadepoch_objects():
request = service.objects().list(
bucket=bucket_name,
versions=True
)
response = request.execute()
for item in response['items']:
if item['metadata']['epoch'] == restore_epoch:
print(item['bucket'])
print(item['name'])
print(item['metadata']['epoch'])
print(item['updated'])
blob = source_bucket.blob(item['name'])
blob.download_to_filename(
'/Users/admin/git/data-processing/{}'.format(item))
downloadepoch_objects()
The above function works properly for a blob that is not within a directory (gs://bucketname/test1.txt) as the item that gets passed in is simply test1.txt. The issue I am running into is when trying to download files from a complex directory tree (gs://bucketname/nfs/media/docs/test1.txt) The item that gets passed is nfs/media/docs/test1.txt. Is it possible to have the .download_to_file() method to create directories if they are not present?
Below is the working solution. I ended up stripping away the path from the object name and creating the directory structure on the fly. A better way might be as #Brandon Yarbrough suggested using 'prefix + response['prefixes'][0]' but I couldn't quite figure that out. Hope this helps others out.
#!/usr/local/bin/python3
from google.cloud import storage
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json
import os
import pathlib
bucket_name = 'test-bucket'
restore_epoch = '1519189202'
restore_location = '/Users/admin/data/'
credentials = GoogleCredentials.get_application_default()
service = discovery.build('storage', 'v1', credentials=credentials)
storage_client = storage.Client()
source_bucket = storage_client.get_bucket(bucket_name)
def listall_objects():
request = service.objects().list(
bucket=bucket_name,
versions=True
)
response = request.execute()
print(json.dumps(response, indent=2))
def listname_objects():
request = service.objects().list(
bucket=bucket_name,
versions=True
)
response = request.execute()
for item in response['items']:
print(item['name'] + ' Uploaded on: ' + item['updated'] +
' Epoch: ' + item['metadata']['epoch'])
def downloadepoch_objects():
request = service.objects().list(
bucket=bucket_name,
versions=True
)
response = request.execute()
try:
for item in response['items']:
if item['metadata']['epoch'] == restore_epoch:
print('Downloading ' + item['name'] + ' from ' +
item['bucket'] + '; Epoch= ' + item['metadata']['epoch'])
print('Saving to: ' + restore_location)
blob = source_bucket.blob(item['name'])
path = pathlib.Path(restore_location + r'{}'.format(item['name'])).parent
if os.path.isdir(path):
blob.download_to_filename(restore_location + '{}'.format(item['name']))
print('Download complete')
else:
os.mkdir(path)
blob.download_to_filename(restore_location + '{}'.format(item['name']))
print('Download complete')
except Exception:
pass
# listall_objects()
# listname_objects()
downloadepoch_objects()
GCS does not have a notion of "directories," although tools like gsutil do a good job of pretending for convenience. If you want all of the objects under the "nfs/media/docs/" path, you can specify that as a prefix, like so:
request = service.objects.list(
bucket=bucket_name,
versions=True,
prefix='nfs/media/docs/', # Only show objects beginning like this
delimiter='/' # Consider this character a directory marker.
)
response = request.execute()
subdirectories = response['prefixes']
objects = response['items']
Because of the prefix parameter, only objects that begin with 'nfs/media/docs' will be returned in response['items']. Because of the delimiter parameter, "subdirectories" will be returned in response['prefixes']. You can get more details in the Python documentation of the objects.list method.
If you were to use the newer google-cloud Python library, which I'd recommended for new code, the same call would look pretty similar:
from google.cloud import storage
client = storage.Client()
bucket = client.bucket(bucket_name)
iterator = bucket.list_blobs(
versions=True,
prefix='nfs/media/docs/',
delimiter='/'
)
subdirectories = iterator.prefixes
objects = list(iterator)
Following solution worked for me. I am recursively downloading all blobs from a path prefix to a model directory at the project root, while maintaining the folder structure.
Multiple blobs are being downloaded concurrently.
GCS client version
google-cloud-storage==1.41.1
import os
from datetime import datetime
from google.cloud import storage
from concurrent.futures import ThreadPoolExecutor
BUCKET_NAME = "ml-model"
def timer(func):
def time_wrapper(*arg, **kwargs):
start = datetime.now()
func(*arg, **kwargs)
diff = datetime.now() - start
logger.info(f"{func.__name__} took {diff.seconds} s and {diff.microseconds//1000} ms")
return time_wrapper
def fetch_environment() -> str:
env = os.environ.get("environment", "staging")
return env
def create_custom_folder(dir_name: str):
if not os.path.exists(dir_name):
os.makedirs(dir_name)
def fetch_gcs_credential_file_path():
return os.environ.get("GCS_CREDENTIAL_FILE_PATH")
class GCS:
def __init__(self):
cred_file_path = fetch_gcs_credential_file_path()
self.client = storage.Client.from_service_account_json(cred_file_path)
self.bucket = self.client.bucket(BUCKET_NAME)
def download_blob(self, blob):
filename = blob.name.replace(self.path_prefix, '')
delimiter_based_splits = filename.split('/')
if len(delimiter_based_splits) > 1:
dir_name = "model/" + "/".join(delimiter_based_splits[: len(delimiter_based_splits)-1])
create_custom_folder(dir_name)
blob.download_to_filename(f"{dir_name}/{delimiter_based_splits[-1]}")
else:
blob.download_to_filename(f"model/" + filename)
#timer
def download_blobs_multithreaded(self, prefix: str):
'''
CREATE FOLDER IF NOT EXISTS
'''
create_custom_folder("model")
blobs = self.bucket.list_blobs(prefix=prefix)
self.path_prefix = prefix
with ThreadPoolExecutor() as executor:
executor.map(self.download_blob, blobs
def download_model():
env = fetch_environment()
folder_path_prefix = f"ml/{env}/{ML_MODEL_NAME}/v1/tf-saved-model/"
gcs = GCS()
gcs.download_blobs_multithreaded(folder_path_prefix)
if __name__ == '__main__':
download_model()
I built a class that watches for changes in a directory and upload them to a server, it is working fine for one dir. However, i had the idea to use the threading module from python to actually watch more than one directory. But, i am getting confused, since when i change a file in one location, it uploads just fine, but then the OTHER location starts uploading all it's files. I think it's because somehow the threads are sharing the same variable or something, but still it's impossible because each directory has it's own instance of the class working specifically for it.
Here's some code:
import os, ftplib, time
from threading import Thread
class FTPSync(Thread):
local_root = ''
remote_root = ''
host = ''
user = ''
password = ''
content = {
'previous': [],
'current': []
}
ignore = []
rest = 0.5
files = []
cwd = ''
watching = True
def __init__(self, local_root='', remote_root='', config={}):
Thread.__init__(self)
self.local_root = local_root if local_root != '' else os.path.join(os.path.dirname(__file__), os.pardir)
self.remote_root = remote_root
self.ignore = config['ignore'] if 'ignore' in config else []
self.rest = config['rest'] if 'rest' in config else 0.5
self.host, self.user, self.password = config['host'], config['user'], config['password']
self.content['previous'] = self.read_dir(self.local_root)
# Connect and reconnect to the server
def connect(self, reconnect=False):
print "Connecting..."
self.ftp = ftplib.FTP(self.host)
self.ftp.login(self.user, self.password)
print "Welcome message from server:\n"
print self.ftp.getwelcome()
if not reconnect:
self.cwd = self.remote_root
self.ftp.cwd(self.cwd)
# Start watching for local changes
def watch(self):
self.connect()
while self.watching:
self.files = []
self.content['current'] = self.read_dir(self.local_root)
diff = [f for f in self.content['current'] if f not in self.content['previous']]
if len(diff) > 0:
self.stor(diff)
self.content['previous'] = self.content['current']
diff = []
time.sleep(self.rest)
# Read a directory and its contents recursively
def read_dir(self, dir_name, return_value=True):
reading = os.listdir(dir_name)
file_content = None
for i in range(len(reading)):
d = self._local_abspath(dir_name, reading[i])
is_dir = os.path.isdir(d)
file_content = open(d).read() if not is_dir else None
offset = d.replace(self.local_root, '').replace(reading[i], '')
if is_dir and reading[i] not in self.ignore:
self.read_dir(d, return_value=False)
elif not is_dir:
info = {"name": reading[i], "content": file_content, "local_path": d, "offset": offset}
self.files.append(info)
if (return_value):
return self.files
pass
# Here we go
def run(self):
self.watch()
# Store (STOR) the files in the server
def stor(self, files):
nav = ''
try:
for f in files:
if self._server_abspath(f['offset']) != self.cwd:
nav = self._server_abspath(f['offset'])
self.ftp.cwd(nav)
mode = ''
if f['name'].split('.')[-1:][0] in ['jpg', 'png', 'gif'] or os.path.getsize(f['local_path']) > 8190:
mode = 'binary'
self.ftp.storbinary('STOR {!s}'.format(f['name']), open(f['local_path']))
else:
mode = 'ascii'
self.ftp.storlines('STOR {!s}'.format(f['name']), open(f['local_path']))
self.cwd = self._server_abspath(f['offset'])
print "Stored %s in %s mode" % (f['name'], mode)
# The connection has timed out
except ftplib.error_temp:
self.connect(reconnect=True)
self.stor(files)
# A new file has been created inside a folder that does not exist in the server
except ftplib.error_perm:
self.ftp.mkd(nav)
self.stor(files)
# A new folder has been created locally, but we'll wait to update this on the server
# when there's some content inside of it and throw us a ftplib.error_perm error, so here it'll just pass
except IOError:
pass
# Return the absolute path in the server
def _server_abspath(self, path):
return self.remote_root + '/' + path.replace('\\', '/')
# Return the absolute path locally
def _local_abspath(self, dn, fn):
return (dn +'\\'+ fn) if not dn[-1:]=='\\' else dn + fn
def start(local_root='', remote_root='', config={}):
instance = FTPSync(local_root, remote_root, config)
instance.start()
return instance
And this is how i use the class:
import ftpsync
config = {
'host': 'ftp.myhost.com',
'user': '****',
'password': '****',
'ignore': ['.git']
}
ftpsync.start(remote_root='/www/tst', config=config)
ftpsync.start(local_root='C:\\pygames', remote_root='/www/tst', config=config)
I would like to remember that it works fine for ONE directory.
After some time, I realized I had to use processes. I came back here in case someone finds it useful.
So basically, with threads you're just running two or more concurrent things at once, but they all share the same address space and memory, and can cause some unwanted things by having the same context and interacting with each other.
Now with processes, every process is independent from one another, so they all have resources reserved for each one of them. This won't let them share variables and stuff.
I am using python-instagram API and I am displaying some images with searched tag!
Rather than displaying, I want to save those images so that it can be used for further analysis.
Is it possible? I am new to python and using API's.
Here is my code snippet which does this:
#route('/tag_search')
def tag_search(session):
access_token = session.get('access_token')
content = "<h2>Tag Search</h2>"
if not access_token:
return 'Missing Access Token'
try:
api = client.InstagramAPI(access_token=access_token)
tag_search, next_tag = api.tag_search(q="catband")
tag_recent_media, next = api.tag_recent_media(tag_name=tag_search[0].name)
photos = []
for tag_media in tag_recent_media:
photos.append('<img src="%s"/>' % tag_media.get_standard_resolution_url())
content += ''.join(photos)
except Exception, e:
print e
Thanx in advance:)
After some help from comments, and other resources, I found out that since I have URL of the image, I can use it to download!
The library which is used was "urllib"
I used a counter variable to save images in the same directory where the file is and in the form of 1.jpg, 2.jpg and so on and so forth.
Here is the modified code:
#route('/tag_search')
def tag_search(session):
access_token = session.get('access_token')
content = "<h2>Tag Search</h2>"
if not access_token:
return 'Missing Access Token'
try:
api = client.InstagramAPI(access_token=access_token)
tag_search, next_tag = api.tag_search(q="selfie")
tag_recent_media, next = api.tag_recent_media(tag_name=tag_search[0].name)
photos = []
count = 0
for tag_media in tag_recent_media:
photos.append('<img src="%s"/>' % tag_media.get_standard_resolution_url())
urllib.urlretrieve(tag_media.get_standard_resolution_url(), `count`+".jpg")
count = count + 1
content += ''.join(photos)
except Exception, e:
print e
Hope this Helps:)