I want to download an image from a blob that is in a container.
I searched and I only found how to download a container, but as I said I do not want to download the whole container and not the whole blob otherwise just an image.
(container/blob/image.png)
this is the code that i found ( to download all the container):
import os
from azure.storage.blob import BlobServiceClient, BlobClient
from azure.storage.blob import ContentSettings, ContainerClient
# IMPORTANT: Replace connection string with your storage account connection string
# Usually starts with DefaultEndpointsProtocol=https;...
MY_CONNECTION_STRING = "CONNECTION_STRING"
# Replace with blob container
MY_BLOB_CONTAINER = "name"
# Replace with the local folder where you want files to be downloaded
LOCAL_BLOB_PATH = "Blobsss"
BLOBNAME="test"
class AzureBlobFileDownloader:
def __init__(self):
print("Intializing AzureBlobFileDownloader")
# Initialize the connection to Azure storage account
self.blob_service_client = BlobServiceClient.from_connection_string(MY_CONNECTION_STRING)
self.my_container = self.blob_service_client.get_container_client(MY_BLOB_CONTAINER)
def save_blob(self, file_name, file_content):
# Get full path to the file
download_file_path = os.path.join(LOCAL_BLOB_PATH, file_name)
# for nested blobs, create local path as well!
os.makedirs(os.path.dirname(download_file_path), exist_ok=True)
with open(download_file_path, "wb") as file:
file.write(file_content)
def download_all_blobs_in_container(self):
my_blobs = self.my_container.list_blobs()
for blob in my_blobs:
print(blob.name)
bytes = self.my_container.get_blob_client(blob).download_blob().readall()
self.save_blob(blob.name, bytes)
# Initialize class and upload files
azure_blob_file_downloader = AzureBlobFileDownloader()
azure_blob_file_downloader.download_all_blobs_in_container()
Could you please help me ?
thanks you
Related
I'm trying to zip files present in container 'input' and move them to container 'output'.
I'm using python SDK
# connection to blob storage via Azure Python SDK
connection_string = "myConnectionString"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
# get container client
input_container = blob_service_client.get_container_client(container="input")
# filename
filename = "document_to_zip.pdf"
# init zip object
zip_filename = "document_zipped.zip"
zip_object = ZipFile(zip_filename, "w")
data = input_container.download_blob(filename).readall()
zip_object.write(data)
# upload blob to results container as .zip file
results_blob = blob_service_client.get_blob_client(container="output",blob=zip_filename)
results_blob.upload_blob(zip_object, overwrite=True)
Get the following error :
Exception: ValueError: stat: embedded null character in path.
More general question : do you think my approach is fine regarding ziping and moving blob from one container to another ?
Thanks
In general, this error occurs when path contains '/' or ' \' in it. Meanwhile I could able to resolve it by removing the zip_object.write(data) line. Also keep in mind that the above-mentioned code works only for a single file in input container with an unsupported content which throws an error when downloaded.
The below code works but gives error when downloaded
from azure.storage.blob import BlobServiceClient
from zipfile import ZipFile
# connection to blob storage via Azure Python SDK
connection_string = "<YOUR_CONNECTION_STRING>"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
# get container client
input_container = blob_service_client.get_container_client(container="input")
# filename
filename = "document_to_zip.pdf"
# init zip object
zip_filename = "document_zipped.zip"
zip_object = ZipFile(zip_filename, "w")
data = input_container.download_blob(filename).readall()
# upload blob to results container as .zip file
results_blob = blob_service_client.get_blob_client(container="output",blob=zip_filename)
results_blob.upload_blob(zip_object, overwrite=True)
RESULTS:
Meanwhile you can save a group of files by looping inside the input container and zip them inside output container.
from azure.storage.blob import BlobServiceClient
from zipfile import ZipFile
connection_string = "<Your_CONNECTION_STRING>"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
input_container = blob_service_client.get_container_client(container="input")
generator = input_container.list_blobs()
for blob in generator:
data = input_container.download_blob(blob.name).readall()
results_blob = blob_service_client.get_blob_client(container="output"+"/"+"ZipFolder.zip",blob=blob.name)
results_blob.upload_blob(data, overwrite=True)
RESULTS:
I want to list all the blobs in a container and then ultimately store each blobs contents (each blob stores a csv file) into a data frame, it appears that the blob service client is the easiest way to list all the blobs, and this is what I have:
#!/usr/bin/env python3
import os
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
from pathlib import Path
from io import StringIO
import pandas as pd
def main():
connect_str = os.environ['AZURE_CONNECT_STR']
container = os.environ['CONTAINER']
print(connect_str + "\n")
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
container_client = blob_service_client.get_container_client(container)
blob_list = container_client.list_blobs()
for blob in blob_list:
print("\t" + blob.name)
if __name__ == "__main__":
main()
However, in the last version of blob storage client there appears to be no method which allows me to get the actual contents of the blob, what code should I be using ? there are other clients in the Python SDK for Azure, but it getting a full list of the blobs in a container using these seems cumbersome.
What you would need to do is create an instance of BlobClient using the container_client and the blob's name. You can then call download_blob method to download the blob.
Something like:
for blob in blob_list:
print("\t" + blob.name)
blob_client = container_client.get_blob_client(blob.name)
blob_client.download(...)
The below code will download a particular blob by giving the blob name
import constants
import os
import tempfile
from azure.storage.blob import BlobServiceClient
temp_dir = tempfile.TemporaryDirectory()
print(temp_dir.name)
Local_path = os.path.join(temp_dir.name, constants.BLOB_NAME)
class AzureBlob:
def __init__(self, CONNECTION_STRING, BLOB_CONTAINER,
BLOB_PATH, BLOB_NAME):
self.blob_service_client = self.activate_blob_service()
self.container_client = self.initialize_container()
self.BLOB_CONTAINER = BLOB_CONTAINER
self.CONNECTION_STRING = CONNECTION_STRING
self.BLOB_PATH = BLOB_PATH
self.BLOB_NAME = BLOB_NAME
# Initialize a BlobServiceClient object
def activate_blob_service(self):
self.blob_service_client = BlobServiceClient.from_connection_string(self.CONNECTION_STRING)
# print(self.CONNECTION_STRING)
return self.blob_service_client
# Initialize a container from its name
def initialize_container(self):
self.container_client = self.blob_service_client.get_container_client(self.BLOB_CONTAINER)
# print(container_client)
return self.container_client
# Download Blob to local
def download_file(self):
with open(Local_path, 'wb+') as f:
f.write(self.container_client.download_blob(os.path.join(self.BLOB_PATH, self.BLOB_NAME)).readall())
return Local_path
# AzureBlob().download_file()
a = AzureBlob(constants.CONNECTION_STRING, constants.BLOB_CONTAINER,
constants.BLOB_PATH, constants.BLOB_NAME)
What iam actualy trying to achieve is to download all blob from a container where blob is in sub directory. I will provide the directory path of the blob and i need all the information inside the directory to be downloaded.
To achieve the above requirement you can try the below workaround to download all the files from your container,
# download_blobs.py
# Python program to bulk download blob files from azure storage
# Uses latest python SDK() for Azure blob storage
# Requires python 3.6 or above
import os
from azure.storage.blob import BlobServiceClient, BlobClient
from azure.storage.blob import ContentSettings, ContainerClient
# IMPORTANT: Replace connection string with your storage account connection string
# Usually starts with DefaultEndpointsProtocol=https;...
MY_CONNECTION_STRING = "REPLACE_THIS"
# Replace with blob container
MY_BLOB_CONTAINER = "myimages"
# Replace with the local folder where you want files to be downloaded
LOCAL_BLOB_PATH = "REPLACE_THIS"
class AzureBlobFileDownloader:
def __init__(self):
print("Intializing AzureBlobFileDownloader")
# Initialize the connection to Azure storage account
self.blob_service_client = BlobServiceClient.from_connection_string(MY_CONNECTION_STRING)
self.my_container = self.blob_service_client.get_container_client(MY_BLOB_CONTAINER)
def save_blob(self,file_name,file_content):
# Get full path to the file
download_file_path = os.path.join(LOCAL_BLOB_PATH, file_name)
# for nested blobs, create local path as well!
os.makedirs(os.path.dirname(download_file_path), exist_ok=True)
with open(download_file_path, "wb") as file:
file.write(file_content)
def download_all_blobs_in_container(self):
my_blobs = self.my_container.list_blobs()
for blob in my_blobs:
print(blob.name)
bytes = self.my_container.get_blob_client(blob).download_blob().readall()
self.save_blob(blob.name, bytes)
# Initialize class and upload files
azure_blob_file_downloader = AzureBlobFileDownloader()
azure_blob_file_downloader.download_all_blobs_in_container()
For more information please refer this blog post & SO THREAD
I'm pretty new to python and fairly stupid. I'm working on a POC to upload blobs to Azure Blob Storage with a BlobSasURL. Below is my code. When I run it, I get the following error
container_name, blob_name = unquote(path_blob[-2]), unquote(path_blob[-1])
IndexError: list index out of range
Code as it is currently
import os
import yaml
from azure.storage.blob import BlobClient
'''
Importing the configs from yaml
This method required the use of a blob SAS URL or Token
Create config.yaml in teh same path as bluppy.py withg the following
if 'account_url' contains the token or shared_access_key, you don't need to add it to the yaml.
---
source_folder: "./blobs"
account_url: "<ProperlyFormattedBlobSaSURLwithcontainerandcredentialincluded>"
container_name: "<container_name>"
'''
#Import configs from yaml
def bluppy_cfg():
cfg_root = os.path.dirname(os.path.abspath(__file__))
with open(cfg_root + "/config.yaml", "r") as yamlfile:
return yaml.load(yamlfile, Loader=yaml.FullLoader)
#Look in source folder for files to upload to storage
def get_blobs_up(dir):
with os.scandir(dir) as to_go:
for thing in to_go:
if thing.is_file() and not thing.name.startswith('.'):
yield thing
# Uploads a blob to Azure Blob Storage Conatiner via BlobSaSURL
def blob_upload(blob_url, container_name, blob_name):
blob_client = BlobClient.from_blob_url(blob_url, container_name, blob_name)
print("Bluppy is uploading a blob")
for file in files:
azbl_client = blob_client.get_blob_client(file.name)
with open(file.path, "rb") as data:
azbl_client.upload_blob(data)
print(f'{file.name} uploaded to blob storage successfully')
config = bluppy_cfg()
blob_name = get_blobs_up(config["source_folder"])
#print(*blob_name)
blob_upload(config["account_url"], config["container_name"], config["blob_name"])
There are files in the folder. When I print(*blob_name) I see the files/blobs in the folder I'm scanning for upload. I am not sure what I am missing and would appreciate any help.
Again, new/stupid coder here, so please be gentle and thanks in advance for your help!
I am trying to read the contents within "clients" blob storage.
As you can see in the attached picture, I have listed the blobs within that container and now I would like to print the contents of the blobs. for example, I trying to print out the contents of a json file called "clients.json"
import os, uuid, json
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__
from config import config, store
try:
print("Azure Blob Storage v" + __version__ + " - Python quickstart sample")
configConnectionString = config
blob_service_client = BlobServiceClient.from_connection_string(configConnectionString)
container_client = blob_service_client.get_container_client(store)
# List the blobs in the container
# blob_list = container_client.list_blobs()
# for blob in blob_list:
# print("\t" + blob.name)
# Read the contents within the blob containers
with open('clients.json') as json_file:
data = json.load(json_file)
print(data)
except Exception as ex:
print('Exception:')
print(ex)
this is to show that my code is working when I want to list the blobs in the container
The get_container_client return an instance of ContainerClient class. It has a method called download_blob which can be used to download the blob to the StorageStreamDownloader. You can then use the content_as_text method of the StorageStreamDownloader class to download the contents of the blob, and decode as text.
container_client = blob_service_client.get_container_client(store)
downloader = container_client.download_blob("clients.json")
print(downloader.content_as_text())