I'm trying to zip files present in container 'input' and move them to container 'output'.
I'm using python SDK
# connection to blob storage via Azure Python SDK
connection_string = "myConnectionString"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
# get container client
input_container = blob_service_client.get_container_client(container="input")
# filename
filename = "document_to_zip.pdf"
# init zip object
zip_filename = "document_zipped.zip"
zip_object = ZipFile(zip_filename, "w")
data = input_container.download_blob(filename).readall()
zip_object.write(data)
# upload blob to results container as .zip file
results_blob = blob_service_client.get_blob_client(container="output",blob=zip_filename)
results_blob.upload_blob(zip_object, overwrite=True)
Get the following error :
Exception: ValueError: stat: embedded null character in path.
More general question : do you think my approach is fine regarding ziping and moving blob from one container to another ?
Thanks
In general, this error occurs when path contains '/' or ' \' in it. Meanwhile I could able to resolve it by removing the zip_object.write(data) line. Also keep in mind that the above-mentioned code works only for a single file in input container with an unsupported content which throws an error when downloaded.
The below code works but gives error when downloaded
from azure.storage.blob import BlobServiceClient
from zipfile import ZipFile
# connection to blob storage via Azure Python SDK
connection_string = "<YOUR_CONNECTION_STRING>"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
# get container client
input_container = blob_service_client.get_container_client(container="input")
# filename
filename = "document_to_zip.pdf"
# init zip object
zip_filename = "document_zipped.zip"
zip_object = ZipFile(zip_filename, "w")
data = input_container.download_blob(filename).readall()
# upload blob to results container as .zip file
results_blob = blob_service_client.get_blob_client(container="output",blob=zip_filename)
results_blob.upload_blob(zip_object, overwrite=True)
RESULTS:
Meanwhile you can save a group of files by looping inside the input container and zip them inside output container.
from azure.storage.blob import BlobServiceClient
from zipfile import ZipFile
connection_string = "<Your_CONNECTION_STRING>"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
input_container = blob_service_client.get_container_client(container="input")
generator = input_container.list_blobs()
for blob in generator:
data = input_container.download_blob(blob.name).readall()
results_blob = blob_service_client.get_blob_client(container="output"+"/"+"ZipFolder.zip",blob=blob.name)
results_blob.upload_blob(data, overwrite=True)
RESULTS:
I want to list all the blobs in a container and then ultimately store each blobs contents (each blob stores a csv file) into a data frame, it appears that the blob service client is the easiest way to list all the blobs, and this is what I have:
#!/usr/bin/env python3
import os
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
from pathlib import Path
from io import StringIO
import pandas as pd
def main():
connect_str = os.environ['AZURE_CONNECT_STR']
container = os.environ['CONTAINER']
print(connect_str + "\n")
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
container_client = blob_service_client.get_container_client(container)
blob_list = container_client.list_blobs()
for blob in blob_list:
print("\t" + blob.name)
if __name__ == "__main__":
main()
However, in the last version of blob storage client there appears to be no method which allows me to get the actual contents of the blob, what code should I be using ? there are other clients in the Python SDK for Azure, but it getting a full list of the blobs in a container using these seems cumbersome.
What you would need to do is create an instance of BlobClient using the container_client and the blob's name. You can then call download_blob method to download the blob.
Something like:
for blob in blob_list:
print("\t" + blob.name)
blob_client = container_client.get_blob_client(blob.name)
blob_client.download(...)
I want to download an image from a blob that is in a container.
I searched and I only found how to download a container, but as I said I do not want to download the whole container and not the whole blob otherwise just an image.
(container/blob/image.png)
this is the code that i found ( to download all the container):
import os
from azure.storage.blob import BlobServiceClient, BlobClient
from azure.storage.blob import ContentSettings, ContainerClient
# IMPORTANT: Replace connection string with your storage account connection string
# Usually starts with DefaultEndpointsProtocol=https;...
MY_CONNECTION_STRING = "CONNECTION_STRING"
# Replace with blob container
MY_BLOB_CONTAINER = "name"
# Replace with the local folder where you want files to be downloaded
LOCAL_BLOB_PATH = "Blobsss"
BLOBNAME="test"
class AzureBlobFileDownloader:
def __init__(self):
print("Intializing AzureBlobFileDownloader")
# Initialize the connection to Azure storage account
self.blob_service_client = BlobServiceClient.from_connection_string(MY_CONNECTION_STRING)
self.my_container = self.blob_service_client.get_container_client(MY_BLOB_CONTAINER)
def save_blob(self, file_name, file_content):
# Get full path to the file
download_file_path = os.path.join(LOCAL_BLOB_PATH, file_name)
# for nested blobs, create local path as well!
os.makedirs(os.path.dirname(download_file_path), exist_ok=True)
with open(download_file_path, "wb") as file:
file.write(file_content)
def download_all_blobs_in_container(self):
my_blobs = self.my_container.list_blobs()
for blob in my_blobs:
print(blob.name)
bytes = self.my_container.get_blob_client(blob).download_blob().readall()
self.save_blob(blob.name, bytes)
# Initialize class and upload files
azure_blob_file_downloader = AzureBlobFileDownloader()
azure_blob_file_downloader.download_all_blobs_in_container()
Could you please help me ?
thanks you
I can successfully access the google cloud bucket from my python code running on my PC using the following code.
client = storage.Client()
bucket = client.get_bucket('bucket-name')
blob = bucket.get_blob('images/test.png')
Now I don't know how to retrieve and display image from the "blob" without writing to a file on the hard-drive?
You could, for example, generate a temporary url
from gcloud import storage
client = storage.Client() # Implicit environ set-up
bucket = client.bucket('my-bucket')
blob = bucket.blob('my-blob')
url_lifetime = 3600 # Seconds in an hour
serving_url = blob.generate_signed_url(url_lifetime)
Otherwise you can set the image as public in your bucket and use the permanent link that you can find in your object details
https://storage.googleapis.com/BUCKET_NAME/OBJECT_NAME
Download the image from GCS as bytes, wrap it in BytesIO object to make the bytes file-like, then read in as a PIL Image object.
from io import BytesIO
from PIL import Image
img = Image.open(BytesIO(blob.download_as_bytes()))
Then you can do whatever you want with img -- for example, to display it, use plt.imshow(img).
In Jupyter notebooks you can display the image directly with download_as_bytes:
from google.cloud import storage
from IPython.display import Image
client = storage.Client() # Implicit environment set up
# with explicit set up:
# client = storage.Client.from_service_account_json('key-file-location')
bucket = client.get_bucket('bucket-name')
blob = bucket.get_blob('images/test.png')
Image(blob.download_as_bytes())
Can someone tell me if it is possible to read a csv file directly from Azure blob storage as a stream and process it using Python? I know it can be done using C#.Net (shown below) but wanted to know the equivalent library in Python to do this.
CloudBlobClient client = storageAccount.CreateCloudBlobClient();
CloudBlobContainer container = client.GetContainerReference("outfiles");
CloudBlob blob = container.GetBlobReference("Test.csv");*
Yes, it is certainly possible to do so. Check out Azure Storage SDK for Python
from azure.storage.blob import BlockBlobService
block_blob_service = BlockBlobService(account_name='myaccount', account_key='mykey')
block_blob_service.get_blob_to_path('mycontainer', 'myblockblob', 'out-sunset.png')
You can read the complete SDK documentation here: http://azure-storage.readthedocs.io.
Here's a way to do it with the new version of the SDK (12.0.0):
from azure.storage.blob import BlobClient
blob = BlobClient(account_url="https://<account_name>.blob.core.windows.net"
container_name="<container_name>",
blob_name="<blob_name>",
credential="<account_key>")
with open("example.csv", "wb") as f:
data = blob.download_blob()
data.readinto(f)
See here for details.
One can stream from blob with python like this:
from tempfile import NamedTemporaryFile
from azure.storage.blob.blockblobservice import BlockBlobService
entry_path = conf['entry_path']
container_name = conf['container_name']
blob_service = BlockBlobService(
account_name=conf['account_name'],
account_key=conf['account_key'])
def get_file(filename):
local_file = NamedTemporaryFile()
blob_service.get_blob_to_stream(container_name, filename, stream=local_file,
max_connections=2)
local_file.seek(0)
return local_file
Provide Your Azure subscription Azure storage name and Secret Key as Account Key here
block_blob_service = BlockBlobService(account_name='$$$$$$', account_key='$$$$$$')
This still get the blob and save in current location as 'output.jpg'
block_blob_service.get_blob_to_path('you-container_name', 'your-blob', 'output.jpg')
This will get text/item from blob
blob_item= block_blob_service.get_blob_to_bytes('your-container-name','blob-name')
blob_item.content
I recommend using smart_open.
import os
from azure.storage.blob import BlobServiceClient
from smart_open import open
connect_str = os.environ['AZURE_STORAGE_CONNECTION_STRING']
transport_params = {
'client': BlobServiceClient.from_connection_string(connect_str),
}
# stream from Azure Blob Storage
with open('azure://my_container/my_file.txt', transport_params=transport_params) as fin:
for line in fin:
print(line)
# stream content *into* Azure Blob Storage (write mode):
with open('azure://my_container/my_file.txt', 'wb', transport_params=transport_params) as fout:
fout.write(b'hello world')
Since I wasn't able to find what I needed on this thread, I wanted to follow up on #SebastianDziadzio's answer to retrieve the data without downloading it as a local file, which is what I was trying to find for myself.
Replace the with statement with the following:
from io import BytesIO
import pandas as pd
with BytesIO() as input_blob:
blob_client_instance.download_blob().download_to_stream(input_blob)
input_blob.seek(0)
df = pd.read_csv(input_blob, compression='infer', index_col=0)
Here is the simple way to read a CSV using Pandas from a Blob:
import os
from azure.storage.blob import BlobServiceClient
service_client = BlobServiceClient.from_connection_string(os.environ['AZURE_STORAGE_CONNECTION_STRING'])
client = service_client.get_container_client("your_container")
bc = client.get_blob_client(blob="your_folder/yourfile.csv")
data = bc.download_blob()
with open("file.csv", "wb") as f:
data.readinto(f)
df = pd.read_csv("file.csv")
To Read from Azure Blob
I want to use csv from azure blob storage to openpyxl xlsx
from io import BytesIO
conn_str = os.environ.get('BLOB_CONN_STR')
container_name = os.environ.get('CONTAINER_NAME')
blob = BlobClient.from_connection_string(conn_str, container_name=container_name,
blob_name="YOUR BLOB PATH HERE FROM AZURE BLOB")
data = blob.download_blob()
workbook_obj = openpyxl.load_workbook(filename=BytesIO(data.readall()))
To write in Azure Blob
I struggled lot for this I don't want anyone to do same,
If you are using openpyxl and want to directly write from azure function to blob storage do following steps and you will achieve what you are seeking for.
Thanks. HMU if you need anyhelp.
blob=BlobClient.from_connection_string(conn_str=conString,container_name=container_name, blob_name=r'YOUR_PATH/test1.xlsx')
blob.upload_blob(save_virtual_workbook(wb))
I know this is an old post but if someone wants to do the same.
I was able to access as per below codes
Note: you need to set the AZURE_STORAGE_CONNECTION_STRING which can be obtained from Azure Portal -> Go to your storage -> Settings -> Access keys and then you will get the connection string there.
For Windows:
setx AZURE_STORAGE_CONNECTION_STRING ""
For Linux:
export AZURE_STORAGE_CONNECTION_STRING=""
For macOS:
export AZURE_STORAGE_CONNECTION_STRING=""
import os
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__
connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
print(connect_str)
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
container_client = blob_service_client.get_container_client("Your Storage Name Here")
try:
print("\nListing blobs...")
# List the blobs in the container
blob_list = container_client.list_blobs()
for blob in blob_list:
print("\t" + blob.name)
except Exception as ex:
print('Exception:')
print(ex)