I am trying to set up cloud functions to move files between folders inside one bucket in GCP.
Whenever the user loads files into the provided bucket folder, my cloud functions move the file to another folder where big data scripts are looking after.
It shows successful while setting up, however, files are not moving from the source folders.
Appreciate your help
from google.cloud import storage
def move_file(bucket_name, bucket_Folder, blob_name):
"""Moves a blob from one folder to another with the same name."""
bucket_name = 'bucketname'
blob_name = 'filename'
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
source_blob = bucket.blob("Folder1/" + blob_name)
new_blob = bucket.copy_blob(source_blob, bucket, "Folder2/" + blob_name)
blob.delete()
print('Blob {} in bucket {} copied to blob {} .'.format(source_blob.name, bucket.name, new_blob.name))
From the code you provided, the variable blob is not defined anywhere, so the source file won't be deleted. Instead of blob.delete(), change that line to source_blob.delete().
Also, I assume you are aware that you're "moving" just a single file. If you want to move all files prefixed with Folder1/ to Folder2 you could do something like this instead:
from google.cloud import storage
def move_files(self):
storage_client = storage.Client()
bucket = storage_client.get_bucket('bucketname')
blobs = bucket.list_blobs(prefix='Folder1/')
for blob in blobs:
bucket.rename_blob(blob, new_name=blob.name.replace('Folder1/', 'Folder2/'))
For the latter, I reckon that there could be more efficient or better ways to do it.
If you are just moving the object inside of the same bucket you can just rename the object with the desired route.
In Google Cloud Platform Storage there are no folders, just the illusion of them. Everything after the name of the bucket is part of the name of the object.
Also, I can see many errors in your function. You can use this generic function to move a blob from one folder to another inside of the same bucket:
from google.cloud import storage
def rename_blob(bucket_name, blob_name, new_name):
"""Renames a blob."""
# bucket_name = "your-bucket-name"
# blob_name = "folder/myobject"
# new_name = "newfolder/myobject"
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(blob_name)
new_blob = bucket.rename_blob(blob, new_name)
print("Blob {} has been renamed to {}".format(blob.name, new_blob.name))
Related
I've wrote the following code to upload a file into blob storage using Python:
blob_service_client = ContainerClient(account_url="https://{}.blob.core.windows.net".format(ACCOUNT_NAME),
credential=ACCOUNT_KEY,
container_name=CONTAINER_NAME)
blob_service_client.upload_blob("my_file.txt", open("my_file.txt", "rb"))
this works fine. I wonder how can I upload the entire folder with all files and sub folders in it while keeping the structure of my local folder intact?
After reproducing from my end I could able to achieve your requirement using os module. Below is the complete code that worked for me.
dir_path = r'<YOUR_LOCAL_FOLDER>'
for path, subdirs, files in os.walk(dir_path):
for name in files:
fullPath=os.path.join(path, name)
print("FullPath : "+fullPath)
file=fullPath.replace(dir_path,'')
fileName=file[1:len(file)];
print("File Name :"+fileName)
# Create a blob client using the local file name as the name for the blob
blob_service_client = ContainerClient(account_url=ACCOUNT_URL,
credential=ACCOUNT_KEY,
container_name=CONTAINER_NAME)
print("\nUploading to Azure Storage as blob:\n\t" + fileName)
blob_service_client.upload_blob(fileName, open(fullPath, "rb"))
Below is the folder structure in my local.
├───g.txt
├───h.txt
├───Folder1
├───z.txt
├───y.txt
├───Folder2
├───a.txt
├───b.txt
├───SubFolder1
├───c.txt
├───d.txt
RESULTS:
Hi and thank you for reading.
I'm new GCP, and I still can't find a solution to my problem.
I've searched many topics but no solution helped me move on.
INPUT INFORMATION
I have files stored in my bucket in Cloud Storage.
These files could be of any extension, but I need to select only .zips
I want to write a python script in App-Engine, which will find and select these zip files then unzip them in the same directory in Cloud Storage
Below version of the script, which doesn't work
from google.cloud import storage
from zipfile import ZipFile
def list_blobs(bucket_name):
storage_client = storage.Client()
blobs = storage_client.list_blobs(bucket_name)
for blob in blobs:
try:
with ZipFile(f'{blob.name}', 'r') as zipObj:
zipObj.extractall()
except:
print(f'{blob.name} not supported unzipping')
list_blobs('test_bucket_for_me_05_08_2021')
Output
proxy.txt not supported unzipping
test.zip not supported unzipping
I find solution, this code below will unzipped zips in bucket
from google.cloud import storage
from zipfile import ZipFile
from zipfile import is_zipfile
import io
storage_client = storage.Client()
def unzip_files(bucketname):
bucket = storage_client.get_bucket(bucketname)
blobs = storage_client.list_blobs(bucketname)
for blob in blobs:
file = bucket.blob(blob.name)
try:
zipbytes = io.BytesIO(file.download_as_string())
if is_zipfile(zipbytes):
with ZipFile(zipbytes, 'r') as selected_zip:
for files_in_zip in selected_zip.namelist():
file_in_zip = selected_zip.read(files_in_zip)
blob_new = bucket.blob(files_in_zip)
blob_new.upload_from_string(file_in_zip)
except:
print(f'{blob.name} not supported')
unzip_files("test_bucket_for_me_05_08_2021")
Of course , i will modify this code, but this solution works
Thanks all for your time and effort
I have an AWS S3 bucket called task-details and a folder called archive so the S3 URI is s3://task-details/archive/ & ARN of arn:aws:s3:::task-details/archive/. I'm trying to use the upload_file method from Python's boto3 package to upload a CSV file to a folder within the S3 bucket.
Below is the method I am using to try and upload data to the folder but I keep getting a regex error which makes me think that I can't upload data to a specific folder in a bucket. Does anyone know how to do this?
Method:
import logging
import boto3
from botocore.exceptions import ClientError
def upload_file(file_name, bucket, object_name=None):
"""Upload a file to an S3 bucket
:param file_name: File to upload
:param bucket: Bucket to upload to
:param object_name: S3 object name. If not specified then file_name is used
:return: True if file was uploaded, else False
"""
# If S3 object_name was not specified, use file_name
if object_name is None:
object_name = file_name
# Upload the file
s3_client = boto3.client('s3')
try:
response = s3_client.upload_file(file_name, bucket, object_name)
except ClientError as e:
logging.error(e)
return False
return True
My Code (I've also tried bucket = s3://task-details/archive/ and bucket = task-details/archive/):
upload_file(
file_name = filepath,
bucket = "arn:aws:s3:::task-details/archive/",
object_name = object_name
)
The Error:
Invalid bucket name "arn:aws:s3:::task-details/archive/": Bucket name must match the regex
"^[a-zA-Z0-9.\-_]{1,255}$" or be an ARN matching the regex "^arn:(aws).*:(s3|s3-object-lambda):[a-z\-0-9]+:[0-9]{12}:accesspoint[/:][a-zA-Z0-9\-]{1,63}$|^arn:(aws).*:s3-outposts:[a-z\-0-9]+:[0-9]{12}:outpost[/:][a-zA-Z0-9\-]{1,63}[/:]accesspoint[/:][a-zA-Z0-9\-]{1,63}$"
The API call wants the bucket name or ARN. Your bucket name is task-details and your bucket ARN is arn:aws:s3:::task-details.
You use the Key parameter when calling upload_file to specify the object's key, for example archive/cats/persian.png. Note that the S3 object key is not simply the object/file name but also includes the prefix/folder.
The problem was that I needed to add the folder path as part of the object_name instead of the bucket.
Fixed Code:
upload_file(
file_name = filepath,
bucket = "arn:aws:s3:::task-details",
object_name = "archive/" + object_name
)
In my case, the other answers didn't resolve the error. I had to change my bucket name from the arn ("arn:aws:s3:::bucket-name") to the bucket name alone ("bucket-name").
Adding a potentially helpful explanation from #jarmod:
Interesting that the current boto3 docs say it’s the bucket name but the error message in the original post above indicated that it could be an ARN. Personally I always use bucket name and maybe the error message above is a red herring. Certainly it seems that ARN is not supported.
I was trying to upload the contents of a zip file. Also I wanted to upload to a folder inside the bucket. So this is my working solution.
s3.upload_fileobj(
Bucket='user-data',
Key='my_extracted-files/' + file_name, # file_name is the name of file
Fileobj=zop # This is a file obj
)
My guess is the naming format of bucket and key params is the same for method: upload_file too
Note: Don't forget to add Put permissions in your policy
At the following page
https://googlecloudplatform.github.io/google-cloud-python/latest/storage/blobs.html
there are all the API calls which can be used for Python & Google Cloud storage. Even in the "official" samples on github
https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/storage/cloud-client/snippets.py
don't have a related example.
Finally, downloading a directory with the same method used for download files gives the error
Error: [Errno 21] Is a directory:
You just have to first list all the files in a directory and then download them one by one:
bucket_name = 'your-bucket-name'
prefix = 'your-bucket-directory/'
dl_dir = 'your-local-directory/'
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name=bucket_name)
blobs = bucket.list_blobs(prefix=prefix) # Get list of files
for blob in blobs:
filename = blob.name.replace('/', '_')
blob.download_to_filename(dl_dir + filename) # Download
blob.name includes the entire directory structure + filename, so if you want the same file name as in the bucket, you might want to extract it first (instead of replacing / with _)
If you want to keep the same directory structure without renaming and also create nested folders. I have for python 3.5+ a solution based on #ksbg answer :
from pathlib import Path
bucket_name = 'your-bucket-name'
prefix = 'your-bucket-directory/'
dl_dir = 'your-local-directory/'
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name=bucket_name)
blobs = bucket.list_blobs(prefix=prefix) # Get list of files
for blob in blobs:
if blob.name.endswith("/"):
continue
file_split = blob.name.split("/")
directory = "/".join(file_split[0:-1])
Path(directory).mkdir(parents=True, exist_ok=True)
blob.download_to_filename(blob.name)
Lets say, we want to download FINALFOLDER from the storage path: gs://TEST_BUCKET_NAME/FOLDER1/FOLDER2/FINALFOLDER
After downloading, the final path will look like: D:\\my_blob_data\FINALFOLDER
from os import makedirs
from os.path import join, isdir, isfile, basename
from google.cloud import storage
# if your environment was authenticated, the default config will be picked up
storage_client = storage.Client() # comment this line if you want to use service account
# uncomment the line below if you have a service account json
# storage_client = storage.Client.from_service_account_json('creds/sa.json')
bucket_name = 'TEST_BUCKET_NAME'
prefix = 'FOLDER2'
dst_path = 'D:\\my_blob_data'
if isdir(dstPath) == False:
makedirs(dstPath)
bucket = storage_client.bucket(bucket_name=bucket_name)
blobs = bucket.list_blobs(prefix=prefix) # Get list of files
for blob in blobs:
blob_name = blob.name
dst_file_name = blob_name.replace('FOLDER1/FOLDER2', dst_path) #.replace('FOLDER1/FOLDER2', 'D:\\my_blob_data')
# extract the final directory and create it in the destination path if it does not exist
dst_dir = dst_file_name.replace('/' + basename(dst_file_name), '')
if isdir(dst_dir) == False:
makedirs(dst_dir)
# download the blob object
blob.download_to_filename(dst_file_name)
Using tensoflow gfile package, here is a recursive function.
root_dir is the GCS parent folder.
local_base_dir is the parent folder created at local
def copy_recursively(root_dir, local_base_dir):
if tf.io.gfile.exists(local_base_dir):
tf.io.gfile.rmtree(local_base_dir)
tf.io.gfile.mkdir(local_base_dir)
file_list = tf.io.gfile.glob(root_dir+'/**')
for item in file_list:
if not tf.io.gfile.isdir(item):
fname = item.rsplit('/',1)[-1]
if not fname.startswith('.'):
tf.io.gfile.copy(item,
os.path.join(local_base_dir,fname),
overwrite=False)
else:
child_dir= item.rsplit('/',1)[-1]
full_dir_path = os.path.join(local_base_dir,child_dir)
print(f"Setting up child directory: {full_dir_path}")
copy_recursively(item,full_dir_path)
root_dir = 'gs://.../.../..'
local_base_dir = root_dir.rsplit('/',1)[-1]
copy_recursively(root_dir, local_base_dir)
Refer This Link- https://medium.com/#sandeepsinh/multiple-file-download-form-google-cloud-storage-using-python-and-gcs-api-1dbcab23c44
1 - Add Your Credential Json
2 - List Bucket Items
3 - Download
import logging
import os
from google.cloud import storage
global table_id
global bucket_name
logging.basicConfig(format=’%(levelname)s:%(message)s’, level=logging.DEBUG)
bucket_name = ‘mybucket’
table_id = ‘shakespeare’
storage_client = storage.Client.from_service_account_json(‘/google-cloud/keyfile/service_account.json’)
# The “folder” where the files you want to download are
folder=’/google-cloud/download/{}’.format(table_id)
delimiter=’/’
bucket=storage_client.get_bucket(bucket_name)
blobs=bucket.list_blobs(prefix=table_id, delimiter=delimiter) #List all objects that satisfy the filter.
# Download the file to a destination
def download_to_local():
logging.info(‘File download Started…. Wait for the job to complete.’)
# Create this folder locally if not exists
if not os.path.exists(folder):
os.makedirs(folder)
# Iterating through for loop one by one using API call
for blob in blobs:
logging.info(‘Blobs: {}’.format(blob.name))
destination_uri = ‘{}/{}’.format(folder, blob.name)
blob.download_to_filename(destination_uri)
logging.info(‘Exported {} to {}’.format(
blob.name, destination_uri))
if __name__ == ‘__main__’:
download_to_local()
I am trying to set up an app where users can download their files stored in an S3 Bucket. I am able to set up my bucket, and get the correct file, but it won't download, giving me the this error: No such file or directory: 'media/user_1/imageName.jpg' Any idea why? This seems like a relatively easy problem, but I can't quite seem to get it. I can delete an image properly, so it is able to identify the correct image.
Here's my views.py
def download(request, project_id=None):
conn = S3Connection('AWS_BUCKET_KEY', 'AWS_SECRET_KEY')
b = Bucket(conn, 'BUCKET_NAME')
k = Key(b)
instance = get_object_or_404(Project, id=project_id)
k.key = 'media/'+str(instance.image)
k.get_contents_to_filename(str(k.key))
return redirect("/dashboard/")
The problem is that you are downloading to a local directory that doesn't exist (media/user1). You need to either:
Create the directory on the local machine first
Just use the filename rather than a full path
Use the full path, but replace slashes (/) with another character -- this will ensure uniqueness of filename without having to create directories
The last option could be achieved via:
k.get_contents_to_filename(str(k.key).replace('/', '_'))
See also: Boto3 to download all files from a S3 Bucket
Downloading files using boto3 is very simple, configure your AWS credentials at system level before using this code.
client = boto3.client('s3')
// if your bucket name is mybucket and the file path is test/abc.txt
// then the Bucket='mybucket' Prefix='test'
resp = client.list_objects_v2(Bucket="<your bucket name>", Prefix="<prefix of the s3 folder>")
for obj in resp['Contents']:
key = obj['Key']
//to read s3 file contents as String
response = client.get_object(Bucket="<your bucket name>",
Key=key)
print(response['Body'].read().decode('utf-8'))
//to download the file to local
client.download_file('<your bucket name>', key, key.replace('test',''))
replace is to locate the file in your local with s3 file name, if you don't replace it will try to save as 'test/abc.txt'.
import os
import boto3
import json
s3 = boto3.resource('s3', aws_access_key_id="AKIAxxxxxxxxxxxxJWB",
aws_secret_access_key="LV0+vsaxxxxxxxxxxxxxxxxxxxxxry0/LjxZkN")
my_bucket = s3.Bucket('s3testing')
# download file into current directory
for s3_object in my_bucket.objects.all():
# Need to split s3_object.key into path and file name, else it will give error file not found.
path, filename = os.path.split(s3_object.key)
my_bucket.download_file(s3_object.key, filename)