I am using python sdk for this and want to perform move to blob from one container to another but lease creating problem after breaking the lease also.
from azure.storage.blob import BlobLeaseClient, BlobServiceClient
from app.models.remediation import RemediationRequest, RemediationType
from app.shared.azure_storage_client import AzureStorageClient
def remediate(self, remediation_request: RemediationRequest, account: dict,
file_object_metadata: dict,destination_bucket_name: str):
file_type = file_object_metadata["file_type"]
storage_client = AzureStorageClient(account_name=key, account_key=Value)
if file_object_metadata['lease']['status'] == 'locked':
connection_string = storage_client._get_connection_string()
blob_service_client =
BlobServiceClient.from_connection_string(connection_string)
container_client =
blob_service_client.get_container_client(source_bucket)
blob_client = container_client.get_blob_client(blob_name)
break_lease_result = BlobLeaseClient(blob_client).break_lease()
storage_client.move_blob(blob_name, source_bucket,
destination_bucket_name, destination_blob_name,file_type)
'''
blob should move with specify lease id else break the lease and move.
I tried in my environment and got below results:
In my environment I have an two containers with name
Test
Test1
Portal:
In test container,I have a blob with leased state and also normal state.
Portal(test container):
After I tried with below code and the file is broken with lease and successfully copied from one container to another container.
Code:
from azure.storage.blob import BlobLeaseClient, BlobServiceClient
connect_strng="<connect string>"
source_blob="https://<storage acc name>.blob.core.windows.net/test/file.json"
blob_service_client = BlobServiceClient.from_connection_string(connect_strng)
blob_client = blob_service_client.get_blob_client("test", "file.json")
BlobLeaseClient(blob_client).break_lease()
copied_blob = blob_service_client.get_blob_client("test1", 'file.json')
copy = copied_blob.start_copy_from_url(source_blob)
props = copied_blob.get_blob_properties()
print(props.copy.status)
Console:
Portal:
Reference:
azure.storage.blob.BlobLeaseClient class | Microsoft Learn
I'm trying to use the sample provided by Microsoft to connect to an Azure storage table using Python. The code below fail because of tablestorageaccount not found. What I'm missing I installed the azure package but still complaining that it's not found.
import azure.common
from azure.storage import CloudStorageAccount
from tablestorageaccount import TableStorageAccount
print('Azure Table Storage samples for Python')
# Create the storage account object and specify its credentials
# to either point to the local Emulator or your Azure subscription
if IS_EMULATED:
account = TableStorageAccount(is_emulated=True)
else:
account_connection_string = STORAGE_CONNECTION_STRING
# Split into key=value pairs removing empties, then split the pairs into a dict
config = dict(s.split('=', 1) for s in account_connection_string.split(';') if s)
# Authentication
account_name = config.get('AccountName')
account_key = config.get('AccountKey')
# Basic URL Configuration
endpoint_suffix = config.get('EndpointSuffix')
if endpoint_suffix == None:
table_endpoint = config.get('TableEndpoint')
table_prefix = '.table.'
start_index = table_endpoint.find(table_prefix)
end_index = table_endpoint.endswith(':') and len(table_endpoint) or table_endpoint.rfind(':')
endpoint_suffix = table_endpoint[start_index+len(table_prefix):end_index]
account = TableStorageAccount(account_name = account_name, connection_string = account_connection_string, endpoint_suffix=endpoint_suffix)
I find the source sample code, and in the sample code there is still a custom module tablestorageaccount.py, it's just used to return TableService. If you already have the storage connection string and want to have a test, you could connect to table directly.
Sample:
from azure.storage.table import TableService, Entity
account_connection_string = 'DefaultEndpointsProtocol=https;AccountName=account name;AccountKey=account key;EndpointSuffix=core.windows.net'
tableservice=TableService(connection_string=account_connection_string)
Also you could refer to the new sdk to connect table. Here is the official tutorial about Get started with Azure Table storage.
I am currently looking for a way to upload a video to Azure Media Services (AMS v3) via Python SDKs. I have followed its instruction, and am able to connect to AMS successfully.
Example
credentials = AdalAuthentication(
context.acquire_token_with_client_credentials,
RESOURCE,
CLIENT,
KEY)
client = AzureMediaServices(credentials, SUBSCRIPTION_ID) # Successful
I also successfully get all the videos' details uploaded via its portal
for data in client.assets.list(RESOUCE_GROUP_NAME, ACCOUNT_NAME).get(0):
print(f'Asset_name: {data.name}, file_name: {data.description}')
# Asset_name: 4f904060-d15c-4880-8c5a-xxxxxxxx, file_name: 夢想全紀錄.mp4
# Asset_name: 8f2e5e36-d043-4182-9634-xxxxxxxx, file_name: an552Qb_460svvp9.webm
# Asset_name: aef495c1-a3dd-49bb-8e3e-xxxxxxxx, file_name: world_war_2.webm
# Asset_name: b53d8152-6ecd-41a2-a59e-xxxxxxxx, file_name: an552Qb_460svvp9.webm - Media Encoder Standard encoded
However, when I tried to use the following method; it failed. Since I have no idea what to parse as parameters - Link to Python SDKs
create_or_update(resource_group_name, account_name, asset_name,
parameters, custom_headers=None, raw=False, **operation_config)
Therefore, I would like to ask questions as follows (everything is done via Python SDKs):
What kind of parameters does it expect?
Can a video be uploaded directly to AMS or it should be uploaded to Blob Storage first?
Should an Asset contain only one video or multiple files are fine?
The documentation for the REST version of that method is at https://learn.microsoft.com/en-us/rest/api/media/assets/createorupdate. This is effectively the same as the Python parameters.
Videos are stored in Azure Storage for Media Services. This is true for input assets, the assets that are encoded, and any streamed content. It all is in Storage but accessed by Media Services. You do need to create an asset in Media Services which creates the Storage container. Once the Storage container exists you upload via the Storage APIs to that Media Services created container.
Technically multiple files are fine, but there are a number of issues with doing that that you may not expect. I'd recommend using 1 input video = 1 Media Services asset. On the encoding output side there will be more than one file in the asset. Encoding output contains one or more videos, manifests, and metadata files.
I have found my method to work around using Python SDKs and REST; however, I am not quite sure it's proper.
Log-In to Azure Media Services and Blob Storage via Python packages
import adal
from msrestazure.azure_active_directory import AdalAuthentication
from msrestazure.azure_cloud import AZURE_PUBLIC_CLOUD
from azure.mgmt.media import AzureMediaServices
from azure.mgmt.media.models import MediaService
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
Create Assets for an original file and an encoded one by parsing these parameters. Example of the original file Asset creation.
asset_name = 'asset-myvideo'
asset_properties = {
'properties': {
'description': 'Original File Description',
'storageAccountName': "storage-account-name"
}
}
client.assets.create_or_update(RESOUCE_GROUP_NAME, ACCOUNT_NAME, asset_name, asset_properties)
Upload a video to the Blob Storage derived from the created original asset
current_container = [data.container for data in client.assets.list(RESOUCE_GROUP_NAME, ACCOUNT_NAME).get(0) if data.name == asset_name][0] # Get Blob Storage location
file_name = "myvideo.mp4"
blob_client = blob_service_client.get_blob_client(container=current_container, blob=file_name)
with open('original_video.mp4', 'rb') as data:
blob_client.upload_blob(data)
print(f'Video uploaded to {current_container}')
And after that, I do Transform, Job, and Streaming Locator to get the video Streaming Link successfully.
I was able to get this to work with the newer python SDK. The python documentation is mostly missing, so I constructed this mainly from the python SDK source code and the C# examples.
azure-storage-blob==12.3.1
azure-mgmt-media==2.1.0
azure-mgmt-resource==9.0.0
adal~=1.2.2
msrestazure~=0.6.3
0) Import a lot of stuff
from azure.mgmt.media.models import Asset, Transform, Job,
BuiltInStandardEncoderPreset, TransformOutput, \
JobInputAsset, JobOutputAsset, AssetContainerSas, AssetContainerPermission
import adal
from msrestazure.azure_active_directory import AdalAuthentication
from msrestazure.azure_cloud import AZURE_PUBLIC_CLOUD
from azure.mgmt.media import AzureMediaServices
from azure.storage.blob import BlobServiceClient, ContainerClient
import datetime as dt
import time
LOGIN_ENDPOINT = AZURE_PUBLIC_CLOUD.endpoints.active_directory
RESOURCE = AZURE_PUBLIC_CLOUD.endpoints.active_directory_resource_id
# AzureSettings is a custom NamedTuple
1) Log in to AMS:
def get_ams_client(settings: AzureSettings) -> AzureMediaServices:
context = adal.AuthenticationContext(LOGIN_ENDPOINT + '/' +
settings.AZURE_MEDIA_TENANT_ID)
credentials = AdalAuthentication(
context.acquire_token_with_client_credentials,
RESOURCE,
settings.AZURE_MEDIA_CLIENT_ID,
settings.AZURE_MEDIA_SECRET
)
return AzureMediaServices(credentials, settings.AZURE_SUBSCRIPTION_ID)
2) Create an input and output asset
input_asset = create_or_update_asset(
input_asset_name, "My Input Asset", client, azure_settings)
input_asset = create_or_update_asset(
output_asset_name, "My Output Asset", client, azure_settings)
3) Get the Container Name. (most documentation refers to BlockBlobService, which is seems to have been removed from the SDK)
def get_container_name(client: AzureMediaServices, asset_name: str, settings: AzureSettings):
expiry_time = dt.datetime.now(dt.timezone.utc) + dt.timedelta(hours=4)
container_list: AssetContainerSas = client.assets.list_container_sas(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
asset_name=asset_name,
permissions = AssetContainerPermission.read_write,
expiry_time=expiry_time
)
sas_uri: str = container_list.asset_container_sas_urls[0]
container_client: ContainerClient = ContainerClient.from_container_url(sas_uri)
return container_client.container_name
4) Upload a file the the input asset container:
def upload_file_to_asset_container(
container: str, local_file, uploaded_file_name, settings: AzureSettings):
blob_service_client = BlobServiceClient.from_connection_string(settings.AZURE_MEDIA_STORAGE_CONNECTION_STRING))
blob_client = blob_service_client.get_blob_client(container=container, blob=uploaded_file_name)
with open(local_file, 'rb') as data:
blob_client.upload_blob(data)
5) Create a transform (in my case, using the adaptive streaming preset):
def get_or_create_transform(
client: AzureMediaServices,
transform_name: str,
settings: AzureSettings):
transform_output = TransformOutput(preset=BuiltInStandardEncoderPreset(preset_name="AdaptiveStreaming"))
transform: Transform = client.transforms.create_or_update(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
transform_name=transform_name,
outputs=[transform_output]
)
return transform
5) Submit the Job
def submit_job(
client: AzureMediaServices,
settings: AzureSettings,
input_asset: Asset,
output_asset: Asset,
transform_name: str,
correlation_data: dict) -> Job:
job_input = JobInputAsset(asset_name=input_asset.name)
job_outputs = [JobOutputAsset(asset_name=output_asset.name)]
job: Job = client.jobs.create(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
job_name=f"test_job_{UNIQUENESS}",
transform_name=transform_name,
parameters=Job(input=job_input,
outputs=job_outputs,
correlation_data=correlation_data)
)
return job
6) Then I get the URLs after the Event Grid has told me the job is done:
# side-effect warning: this starts the streaming endpoint $$$
def get_urls(client: AzureMediaServices, output_asset_name: str
locator_name: str):
try:
locator: StreamingLocator = client.streaming_locators.create(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_locator_name=locator_name,
parameters=StreamingLocator(
asset_name=output_asset_name,
streaming_policy_name="Predefined_ClearStreamingOnly"
)
)
except Exception as ex:
print("ignoring existing")
streaming_endpoint: StreamingEndpoint = client.streaming_endpoints.get(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_endpoint_name="default")
if streaming_endpoint:
if streaming_endpoint.resource_state != "Running":
client.streaming_endpoints.start(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_endpoint_name="default"
)
paths = client.streaming_locators.list_paths(
resource_group_name=settings.AZURE_MEDIA_RESOURCE_GROUP_NAME,
account_name=settings.AZURE_MEDIA_ACCOUNT_NAME,
streaming_locator_name=locator_name
)
return [f"https://{streaming_endpoint.host_name}{path.paths[0]}" for path in paths.streaming_paths]
I'm trying to grab a blob storage file into my python code on databricks only if it exists. How can I check if it exists through pyspark?
I don't think that there is some method to check if blob exist but from below code you can read it before you will write it.
On the application level, first of all as always in spark applications, you need to grab a spark session:
session = SparkSession.builder.getOrCreate()
Then you need to set up an account key:
session.conf.set(
"fs.azure.account.key.<storage-account-name>.blob.core.windows.net",
"<your-storage-account-access-key>"
)
OR SAS token for a container:
session.conf.set(
"fs.azure.sas.<container-name>.blob.core.windows.net",
"<sas-token>"
)
Once an account access key or a SAS is set up you're ready to read/write to Azure blob:
sdf = session.read.parquet(
"wasbs://<container-name>#<storage-account-name>.blob.core.windows.net/<prefix>"
)
Though using python you can easliy call the get_blob_reference method to check if blob exist or not.
def blob_exists(self):
container_name = self._create_container()
blob_name = self._get_blob_reference()
# Basic
exists = self.service.exists(container_name, blob_name) # False
self.service.create_blob_from_text(container_name, blob_name, u'hello world')
exists = self.service.exists(container_name, blob_name) # True
self.service.delete_container(container_name)
You can find the reference here:
https://github.com/Azure/azure-storage-python/blob/master/samples/blob/block_blob_usage.py
I'm using azure-sdk-for-python to create and delete VMs.
https://github.com/Azure/azure-sdk-for-python
http://azure-sdk-for-python.readthedocs.io/en/latest/
I've successfully managed to write the code to create and delete my VMs using the resource manager approach (not the classic).
The basic to create a VM can be seen here:
http://azure-sdk-for-python.readthedocs.io/en/latest/resourcemanagementcomputenetwork.html
I'm not worried about deleting the resource group and the storage account, as I'm using the same for all my VMs.
To delete a the created VM I have something like this:
# 1. Delete the virtual machine
result = compute_client.virtual_machines.delete(
group_name,
vm_name
)
result.wait()
# 2. Delete the network interface
result = network_client.network_interfaces.delete(
group_name,
network_interface_name
)
result.wait()
# 3. Delete the ip
result = network_client.public_ip_addresses.delete(
group_name,
public_ip_address_name
)
result.wait()
As some know the data disks are not deleted along with its VM.
I know it can be done with the Azure CLI:
https://azure.microsoft.com/en-us/documentation/articles/storage-azure-cli/
azure storage blob delete -a <storage_account_name> -k <storage_account_key> -q vhds <data_disk>.vhd
But I don't know how to do it programmatically with azure-sdk-for-python. And I didn't want to depend on the Azure CLI as the rest of my code is using the python sdk.
I would appreciate some help on how to do it.
Thanks
You can leverage the Azure ComputeManagementClient's disks APIs to obtain list of disks associated with a VM and then iterate over them to delete the disks. Here's some sample code to achieve the same:
def delete_vm(self, vm_name, nic_name, group_name):
# Delete VM
print('Delete VM {}'.format(vm_name))
try:
async_vm_delete = self.compute_client.virtual_machines.delete(group_name, vm_name)
async_vm_delete.wait()
net_del_poller = self.network_client.network_interfaces.delete(group_name, nic_name)
net_del_poller.wait()
disks_list = self.compute_client.disks.list_by_resource_group(group_name)
disk_handle_list = []
for disk in disks_list:
if vm_name in disk.name:
async_disk_delete = self.compute_client.disks.delete(self.group_name, disk.name)
async_disk_handle_list.append(async_disk_delete)
print("Queued disks will be deleted now...")
for async_disk_delete in disk_handle_list:
async_disk_delete.wait()
except CloudError:
print('A VM delete operation failed: {}'.format(traceback.format_exc()))
return False
print("Deleted VM {}".format(vm_name))
return True
You can use the Storage Management SDK to get the storage_account_key without writing it explicitly:
http://azure-sdk-for-python.readthedocs.io/en/latest/resourcemanagementstorage.html#get-storage-account-keys
To delete a VHD inside a storage account, you have to use the Storage Data SDK here:
https://github.com/Azure/azure-storage-python
You have samples in the "samples" folder or here:
https://github.com/Azure-Samples/storage-python-getting-started
Hope it helps :)
Here's a little more code:
storage_account = <name of storage account>
storage_client = StorageManagementClient(...)
keys = storage_client.storage_accounts.list_keys(...)
for key in keys.keys:
# Use the first key; adjust accordingly if your set up is different
break
block_blob_service = BlockBlobService(
account_name=storage_account, account_key=key.value)
for blob in block_blob_service.list_blobs(container):
print blob.name
I hope you find this useful. Thanks to Laurent for the pointers.
To delete the OS disk, one simple way to achieve this is to query for the OS disk name before deleting the VM, and deleting the OS disk after the VM is deleted.
Here is my version of a function that deletes the VM alongside network and storage resources:
def az_delete_vm(resource_group_name, vm_name, delete_os_storage=True, remove_default_network=True):
os_disk_name = None
if delete_os_storage:
vm = compute_client.virtual_machines.get(resource_group_name, vm_name)
os_disk_name = vm.storage_profile.os_disk.name
logger.info("Deleting VM %s", vm_name)
delete_op1 = compute_client.virtual_machines.delete(
resource_group_name, vm_name)
delete_op1.wait()
if delete_os_storage:
delete_op2 = compute_client.disks.delete(resource_group_name, os_disk_name)
delete_op2.wait()
if remove_default_network:
logger.info("Removing VM network components")
vnet_name = "{}-vnet".format(vm_name)
subnet_name = "{}-subnet".format(vm_name)
nic_name = "{}-nic".format(vm_name)
public_ip_name = "{}-public-ip".format(vm_name)
logger.debug("Removing NIC %s", nic_name)
delete_op3 = network_client.network_interfaces.delete(resource_group_name, nic_name)
delete_op3.wait()
# logger.debug("Removing subnet %s", subnet_name)
# network_client.subnets.delete(resource_group_name, subnet_name)
logger.debug("Removing vnet %s", vnet_name)
delete_op4 = network_client.virtual_networks.delete(resource_group_name, vnet_name)
logger.debug("Removing public IP %s", public_ip_name)
delete_op5 = network_client.public_ip_addresses.delete(resource_group_name, public_ip_name)
delete_op4.wait()
delete_op5.wait()
logger.info("Done deleting VM")