How to upload the a folder into Azure blob storage while preserving the structure in Python? - python

I've wrote the following code to upload a file into blob storage using Python:
blob_service_client = ContainerClient(account_url="https://{}.blob.core.windows.net".format(ACCOUNT_NAME),
credential=ACCOUNT_KEY,
container_name=CONTAINER_NAME)
blob_service_client.upload_blob("my_file.txt", open("my_file.txt", "rb"))
this works fine. I wonder how can I upload the entire folder with all files and sub folders in it while keeping the structure of my local folder intact?

After reproducing from my end I could able to achieve your requirement using os module. Below is the complete code that worked for me.
dir_path = r'<YOUR_LOCAL_FOLDER>'
for path, subdirs, files in os.walk(dir_path):
for name in files:
fullPath=os.path.join(path, name)
print("FullPath : "+fullPath)
file=fullPath.replace(dir_path,'')
fileName=file[1:len(file)];
print("File Name :"+fileName)
# Create a blob client using the local file name as the name for the blob
blob_service_client = ContainerClient(account_url=ACCOUNT_URL,
credential=ACCOUNT_KEY,
container_name=CONTAINER_NAME)
print("\nUploading to Azure Storage as blob:\n\t" + fileName)
blob_service_client.upload_blob(fileName, open(fullPath, "rb"))
Below is the folder structure in my local.
├───g.txt
├───h.txt
├───Folder1
├───z.txt
├───y.txt
├───Folder2
├───a.txt
├───b.txt
├───SubFolder1
├───c.txt
├───d.txt
RESULTS:

Related

Google API/Python: Delete old files in Google Drive by filename in local folder, upload new filenames from local folder

I have a problem witch was hard to write in the title. I have this script with a lot of help from #Tanaike . This script is doing basically two things:
Deletes files from Google Drive folder by filenames which are in local folder CSVtoGD (using spreadsheet ID's)
then:
Upload list of CSV from local folder "CSVtoGD" to Google Drive folder
I have a big problem now and can not work it out. The script is deleting old files in google drive when there are the same filenames in CSVtoGD. When I add new file to local folder CSVtoGD, there is a error "list index out of range" and I got printed "No files found" like in the script. I was trying to make some modification but it was blind shoots. What I want this script to do is to delete from Google Drive folder ONLY files which are in local CSVtoGD folder and work on with rest of the files in CSVtoGD (just upload them). Anyone have some answer to that? Thank you :)
import gspread
import os
from googleapiclient.discovery import build
gc = gspread.oauth(credentials_filename='/users/user/credentials.json')
service = build("drive", "v3", credentials=gc.auth)
def getSpreadsheetId(filename):
q = "name='" + filename + "' and mimeType='application/vnd.google-apps.spreadsheet' and trashed=false"
res = service.files().list(q=q, fields="files(id)", corpora="allDrives", includeItemsFromAllDrives=True, supportsAllDrives=True).execute()
items = res.get("files", [])
if not items:
print("No files found.")
exit()
return items[0]["id"]
os.chdir('/users/user/CSVtoGD2')
files = os.listdir()
for filename in files:
fname = filename.split(".")
if fname[1] == "csv":
folder_id = '1z_pUvZyt5AoTNy-aKCKLmlNjdR2OPo'
oldSpreadsheetId = getSpreadsheetId(fname[0])
#print(oldSpreadsheetId)
sh = gc.del_spreadsheet(oldSpreadsheetId)
**# IF there are the same filenames in CSVtoGD folder on my Mac
#and the same filenames on Google Drive folder,
#those lines works well.
#Problem is when there are new files in CSVtoGD local folder on Mac.**
sh = gc.create(fname[0], folder_id)
content = open(filename, "r").read().encode("utf-8")
gc.import_csv(sh.id, content)
I believe your goal is as follows.
For example, when sample.csv is existing on your local PC and a Spreadsheet of sample is existing in your Google Drive, you want to delete the Spreadsheet of sample from your Google Drive.
When sample1.csv is existing on your local PC and the Spreadsheet of sample1 is NOT existing in your Google Drive, you want to upload sample1.csv to Google Drive.
In this case, how about the following modification?
Modified script:
import gspread
import os
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
gc = gspread.oauth(credentials_filename='/users/user/credentials.json')
service = build("drive", "v3", credentials=gc.auth)
folder_id = '1z_pUvZyt5AoTNy-aKCKLmlNjdR2OPo' # Please set the folder ID you want to upload the file.
def getSpreadsheetId(filename, filePath):
q = "name='" + filename + "' and mimeType='application/vnd.google-apps.spreadsheet' and trashed=false"
res = service.files().list(q=q, fields="files(id)", corpora="allDrives", includeItemsFromAllDrives=True, supportsAllDrives=True).execute()
items = res.get("files", [])
if not items:
print("No files found.")
file_metadata = {
"name": filename,
"parents": [folder_id],
"mimeType": "application/vnd.google-apps.spreadsheet",
}
media = MediaFileUpload(filePath + "/" + filename + ".csv")
file = service.files().create(body=file_metadata, media_body=media, fields="id").execute()
id = file.get("id")
print("File was uploaded. The file ID is " + id)
exit()
return items[0]["id"]
filePath = '/users/user/CSVtoGD2'
os.chdir(filePath)
files = os.listdir()
for filename in files:
fname = filename.split(".")
if fname[1] == "csv":
oldSpreadsheetId = getSpreadsheetId(fname[0], filePath)
print(oldSpreadsheetId)
sh = gc.del_spreadsheet(oldSpreadsheetId)
sh = gc.create(fname[0], folder_id)
content = open(filename, "r").read().encode("utf-8")
gc.import_csv(sh.id, content)
When this script is run, the above flow is run.
Note:
In this modification, the CSV file is uploaded as a Google Spreadsheet. From your question, I thought that this might be your expected result. But, if you want to upload the CSV file as the CSV file, please remove "mimeType": "application/vnd.google-apps.spreadsheet", from file_metadata.
If an error related to the scope, please add the scope of https://www.googleapis.com/auth/drive and authorize the scopes again and test it again.
Reference:
Upload file data

Upload folder in Blob Storage with SAS URI

I have to upload a set of folders into a dedicated container in Azure Blob Storage.
I found this:
https://github.com/rahulbagal/upload-file-azure-sas-url
but it is just for uploading a file using a dedicated Blob SAS URI, and it works perfectly.
Is there any similar solution able to manage folder upload instead of a file upload?
Thank you in advanced
1. Please try to use this code:
import os
from azure.storage.blob import BlobServiceClient
account_url = "https://<storage-account-name>.blob.core.windows.net/"
sas_token = "<your-sas-token>"
blob_service_client = BlobServiceClient(account_url, sas_token)
container_name = "<your-container-name>"
container_client = blob_service_client.get_container_client(container_name)
local_path = "<your-folder-path>"
folder_name = "<your-folder-name>"
for files in os.listdir(local_path):
with open(os.path.join(local_path,files), "rb") as data:
blob_client = blob_service_client.get_blob_client(container=container_name, blob= folder_name + "/" + files)
blob_client.upload_blob(data)
2. Or you can use azcopy to upload your folder:
For example:
azcopy copy '<folder-path>' 'https://<account-name>.blob.core.windows.net/<container-name>?<sas-token>' --recursive
For more details, you can refer to this official documentation.

Unzip Password Protected Zip file automatically from azure storage?

I'm just wondering is there a way to extract a password protected zip file from Azure Storage.
I tried using a python Azure Function to no avail but had a problem reading the location of the file.
Would the file have to stored on a shared location temporarily in order to achieve?
Just looking for a bit of direction here am I missing a step maybe?
Regards,
James
Azure blob storage provides storing functionality only, there is no running env to perform unzip operation. So basically, we should download .zip file to Azure function, unzip it and upload files in .zip file 1 by 1.
For a quick test, I write an HTTP trigger Azure function demo that unzipping a zip file with password-protected, it works for me on local :
import azure.functions as func
import uuid
import os
import shutil
from azure.storage.blob import ContainerClient
from zipfile import ZipFile
storageAccountConnstr = '<storage account conn str>'
container = '<container name>'
#define local temp path, on Azure, the path is recommanded under /home
tempPathRoot = 'd:/temp/'
unZipTempPathRoot = 'd:/unZipTemp/'
def main(req=func.HttpRequest) -> func.HttpResponse:
reqBody = req.get_json()
fileName = reqBody['fileName']
zipPass = reqBody['password']
container_client = ContainerClient.from_connection_string(storageAccountConnstr,container)
#download zip file
zipFilePath = tempPathRoot + fileName
with open(zipFilePath, "wb") as my_blob:
download_stream = container_client.get_blob_client(fileName).download_blob()
my_blob.write(download_stream.readall())
#unzip to temp folder
unZipTempPath = unZipTempPathRoot + str(uuid.uuid4())
with ZipFile(zipFilePath) as zf:
zf.extractall(path=unZipTempPath,pwd=bytes(zipPass,'utf8'))
#upload all files in temp folder
for root, dirs, files in os.walk(unZipTempPath):
for file in files:
filePath = os.path.join(root, file)
destBlobClient = container_client.get_blob_client(fileName + filePath.replace(unZipTempPath,''))
with open(filePath, "rb") as data:
destBlobClient.upload_blob(data,overwrite=True)
#remove all temp files
shutil.rmtree(unZipTempPath)
os.remove(zipFilePath)
return func.HttpResponse("done")
Files in my container:
Result:
Using blob triggers will be better to do this as it will cause time-out errors if the size of your zip file is huge.
Anyway, this is only a demo that shows you how to do this.

Create folder in mounted blob

I mount my blob container in azure databricks
I can read my data from blob 'input'
I do transformation on my data and store them in blob "output"
problem: when i mount the contrainer, "output" blob does not exist
So when i try to write my output data I have error message
No such file or directory: '/dbfs/mnt/data/output/file_name.xlsx'
with /dbfs/mnt/data mounted conteners
Edit:
i'm using a strange way to write my data, I use follwing function:
def creation(df_MA,file,file_path):
df_MA.to_excel(file)
shutil.copy2(file,file_path)
#copyfile(file, file_path)
os.remove(file)
with df_MA: created file
file: file name
file_path: /dbfs/mnt/data/output/file_name.xlsx
the function write my df_MA in databricks root folder of data brick then it's suposed to move it in my path
Edit2:
mouting blob
mount_point="/mnt/data/"
dbutils.fs.mount(
source = source,
mount_point = mount_point,
extra_configs = {"secret": dbutils.secrets.get(scope = "keyvault", key = "key")})

Download S3 Files with Boto

I am trying to set up an app where users can download their files stored in an S3 Bucket. I am able to set up my bucket, and get the correct file, but it won't download, giving me the this error: No such file or directory: 'media/user_1/imageName.jpg' Any idea why? This seems like a relatively easy problem, but I can't quite seem to get it. I can delete an image properly, so it is able to identify the correct image.
Here's my views.py
def download(request, project_id=None):
conn = S3Connection('AWS_BUCKET_KEY', 'AWS_SECRET_KEY')
b = Bucket(conn, 'BUCKET_NAME')
k = Key(b)
instance = get_object_or_404(Project, id=project_id)
k.key = 'media/'+str(instance.image)
k.get_contents_to_filename(str(k.key))
return redirect("/dashboard/")
The problem is that you are downloading to a local directory that doesn't exist (media/user1). You need to either:
Create the directory on the local machine first
Just use the filename rather than a full path
Use the full path, but replace slashes (/) with another character -- this will ensure uniqueness of filename without having to create directories
The last option could be achieved via:
k.get_contents_to_filename(str(k.key).replace('/', '_'))
See also: Boto3 to download all files from a S3 Bucket
Downloading files using boto3 is very simple, configure your AWS credentials at system level before using this code.
client = boto3.client('s3')
// if your bucket name is mybucket and the file path is test/abc.txt
// then the Bucket='mybucket' Prefix='test'
resp = client.list_objects_v2(Bucket="<your bucket name>", Prefix="<prefix of the s3 folder>")
for obj in resp['Contents']:
key = obj['Key']
//to read s3 file contents as String
response = client.get_object(Bucket="<your bucket name>",
Key=key)
print(response['Body'].read().decode('utf-8'))
//to download the file to local
client.download_file('<your bucket name>', key, key.replace('test',''))
replace is to locate the file in your local with s3 file name, if you don't replace it will try to save as 'test/abc.txt'.
import os
import boto3
import json
s3 = boto3.resource('s3', aws_access_key_id="AKIAxxxxxxxxxxxxJWB",
aws_secret_access_key="LV0+vsaxxxxxxxxxxxxxxxxxxxxxry0/LjxZkN")
my_bucket = s3.Bucket('s3testing')
# download file into current directory
for s3_object in my_bucket.objects.all():
# Need to split s3_object.key into path and file name, else it will give error file not found.
path, filename = os.path.split(s3_object.key)
my_bucket.download_file(s3_object.key, filename)

Categories