Read text file from Firebase Storage in python - python

I am trying to read a file from Firebase storage under the sub-folder called Transcripts. When I try to read a text file which is in the root folder it works perfectly. However, it fails to read any text file under the sub-folder called "Transcripts".
Here, is the structure of my Firebase Storage bucket:
Transcripts/
Audio 1.txt
Audio 2.txt
Audio 1.amr
Audio 2.amr
Audio Name.txt
Here is the python code where I try to read the file in the root folder:
import pyrebase
config = {
"apiKey": "XXXXXXXX",
"authDomain": "XXXXXXXX.firebaseapp.com",
"databaseURL": "https://XXXXXXXX.firebaseio.com",
"projectId": "XXXXXXXX",
"storageBucket": "XXXXXXXX.appspot.com",
"messagingSenderId": "XXXXXXXX",
"appId": "XXXXXXXX",
"measurementId": "XXXXXXXX",
"serviceAccount":"/Users/faizarahman/Desktop/MY-PROJECT.json"
}
firebase = pyrebase.initialize_app(config) # initializing firebase
storage = firebase.storage() # getting storage reference 1
storage2 = firebase.storage() # getting storage reference 2 (to avoid overwriting to storage reference 1)
url = storage.child("Audio Name").get_url(None) # getting the url from storage
print(url) # printing the url
text_file = urllib.request.urlopen(url).read() # reading the text file
name_list = storage.child("Transcripts/").list_files() # getting all the list of files inside the Transcripts folder.
folder_name = "Transcripts/ "
for file in name_list: # iterating through all the files in the list.
try:
if folder_name in file.name: # Check if the path has "Transcripts"
transcript_name = file.name.replace("Transcripts/ ", "") # Extract the name from the file from "Transcripts/ Audio Number"
unicode_text = text_file.decode("utf-8") # converting the content inside the Audio Name file to a string value.
if transcript_name == unicode_text: # If the content inside the Audio Name file (the content will be a file name) matches with the file name then read that file
text_file1 = storage2.child("Audio Name").get_url(None) # for testing purposes the "Audio Name" works here...
print(text_file1)
except:
print('Download Failed')
The link that it gives me looks like this:
https://firebasestorage.googleapis.com/v0/b/MY-PROJECT-ID.appspot.com/o/Audio%20Name?alt=media
Here, is what I get when I click the link:
Reading Audio Name text file successful.
Here is the python code where I try to read the file in the "Transcripts" folder:
import pyrebase
config = {
"apiKey": "XXXXXXXX",
"authDomain": "XXXXXXXX.firebaseapp.com",
"databaseURL": "https://XXXXXXXX.firebaseio.com",
"projectId": "XXXXXXXX",
"storageBucket": "XXXXXXXX.appspot.com",
"messagingSenderId": "XXXXXXXX",
"appId": "XXXXXXXX",
"measurementId": "XXXXXXXX",
"serviceAccount":"/Users/faizarahman/Desktop/MY-PROJECT.json"
}
firebase = pyrebase.initialize_app(config) # initializing firebase
storage = firebase.storage() # getting storage reference 1
storage2 = firebase.storage() # getting storage reference 2 (to avoid overwriting to storage reference 1)
url = storage.child("Audio Name").get_url(None) # getting the url from storage
print(url) # printing the url
text_file = urllib.request.urlopen(url).read() # reading the text file
name_list = storage.child("Transcripts/").list_files() # getting all the list of files inside the Transcripts folder.
folder_name = "Transcripts/ "
for file in name_list: # iterating through all the files in the list.
try:
if folder_name in file.name: # Check if the path has "Transcripts"
transcript_name = file.name.replace("Transcripts/ ", "") # Extract the name from the file from "Transcripts/ Audio Number"
unicode_text = text_file.decode("utf-8") # converting the content inside the Audio Name file to a string value.
if transcript_name == unicode_text: # If the content inside the Audio Name file (the content will be a file name) matches with the file name then read that file
text_file1 = storage2.child("Transcripts/" + unicode_text).get_url(None) # for testing purposes the "Audio Name" works here but reading the file under transcripts does not work here...
print(text_file1)
except:
print('Download Failed')
The link that it gives me looks like this:
https://firebasestorage.googleapis.com/v0/b/MY-PROJECT-ID.appspot.com/o/Transcripts%2FAudio%202?alt=media
Here is what I get when I try to read the file which is inside the "Transcripts" sub folder:
Reading Audio 2 under transcript sub folder failed.
I believe the error is in this line:
text_file1 = storage2.child("Transcripts/" + unicode_text).get_url(None)

Related

Pyrebase .download() not getting files from Firebase Storage

I am attempting to download some image files from Firebase Storage via the Pyrebase .download() function, but am having trouble actually getting the files. It supposedly finds the file on the database fine, as indicated with print(file.name), but when I try and print the actual line to download the file, it returns None and no files are downloaded in the specified folder. There are no errors as far as I can spot as the code exits with exit code 0.
As suggested by other solutions found, I have already tried adding 'serviceAccount' into the config when using pyrebase.initialize_app(config), and adding 'filename' and 'path' to the .download() function.
import pyrebase
config = {
'apiKey': "...",
'authDomain': "...",
'projectId': "...",
'storageBucket': "...",
'messagingSenderId': "...",
'appId': "...",
'measurementId': "...",
'databaseURL': "",
'serviceAccount': "C:/Users/Dell/PycharmProjects/FYP/creds.json"
}
firebase = pyrebase.initialize_app(config)
storage = firebase.storage()
path_local = "C:/Users/Dell/PycharmProjects/FYP/identDatabase/"
all_files = storage.child("C:/Users/Dell/PycharmProjects/FYP/identDatabase").list_files()
for file in all_files:
print(file.name)
storage.child(file.name).download(filename=file.name.split('/')[6], path=path_local)
# example of full path inside Firebase Storage
# C:/Users/Dell/PycharmProjects/FYP/identDatabase/William Engel.jpg

Consume Events from Azure EventHub using the sequence number in python

I Have published several files into the event hub,
And for another purpose I want to download a specific file from the event hub.
I have the file name with me and as well as the sequence number.
I used this method,
await client.receive(on_event=on_event, starting_position="12856854")
And this is downloading all the files from position 12856854.
But I want to download only one specific file.
As an example, I have published sample_data.xml and it's sequence number is 567890
What I need here is I want to download sample_data.xml file from event hub.
From the code line you've mentioned, starting_position will give us the beginning of partition. So that it will start from that particular point, it is mentioned as below:
await client.receive(
on_event=on_event,
starting_position="-1",
)
Below script in this section reads the captured data files from your Azure storage account and generates CSV files for you to easily open and view.
import os
import string
import json
import uuid
import avro.schema
from azure.storage.blob import ContainerClient, BlobClient
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
def processBlob2(filename):
reader = DataFileReader(open(filename, 'rb'), DatumReader())
dict = {}
for reading in reader:
parsed_json = json.loads(reading["Body"])
if not 'id' in parsed_json:
return
if not parsed_json['id'] in dict:
list = []
dict[parsed_json['id']] = list
else:
list = dict[parsed_json['id']]
list.append(parsed_json)
reader.close()
for device in dict.keys():
filename = os.getcwd() + '\\' + str(device) + '.csv'
deviceFile = open(filename, "a")
for r in dict[device]:
deviceFile.write(", ".join([str(r[x]) for x in r.keys()])+'\n')
def startProcessing():
print('Processor started using path: ' + os.getcwd())
# Create a blob container client.
container = ContainerClient.from_connection_string("AZURE STORAGE CONNECTION STRING", container_name="BLOB CONTAINER NAME")
blob_list = container.list_blobs() # List all the blobs in the container.
for blob in blob_list:
# Content_length == 508 is an empty file, so process only content_length > 508 (skip empty files).
if blob.size > 508:
print('Downloaded a non empty blob: ' + blob.name)
# Create a blob client for the blob.
blob_client = ContainerClient.get_blob_client(container, blob=blob.name)
# Construct a file name based on the blob name.
cleanName = str.replace(blob.name, '/', '_')
cleanName = os.getcwd() + '\\' + cleanName
with open(cleanName, "wb+") as my_file: # Open the file to write. Create it if it doesn't exist.
my_file.write(blob_client.download_blob().readall()) # Write blob contents into the file.
processBlob2(cleanName) # Convert the file into a CSV file.
os.remove(cleanName) # Remove the original downloaded file.
# Delete the blob from the container after it's read.
container.delete_blob(blob.name)
startProcessing()
Refer MS Docs for the process and more information.

Google Translate API - Reading and Writing to Cloud Storage - Python

I'm using Google Translation API to translate a csv file with multiple columns and rows. The target language is english and the file has text in multiple languages.
The code posted below uses local files for testing but I'd like to use (import) file from the cloud storage bucket and export the translated file to a different cloud storage bucket.
I've tried to run the script below with my sample file and got an error message: "FileNotFoundError: [Errno 2] No such file or directory"
I stumbled upon this link for "Reading and Writing to Cloud Storage" but I was not able to implement the suggested solution into the script below. https://cloud.google.com/appengine/docs/standard/python/googlecloudstorageclient/read-write-to-cloud-storage#reading_from_cloud_storage
May I ask for a suggested modification of the script to import (and translate) the file from google cloud bucket and export the translated file to a different google cloud bucket? Thank you!
Script mentioned:
from google.cloud import translate
import csv
def listToString(s):
""" Transform list to string"""
str1 = " "
return (str1.join(s))
def detect_language(project_id,content):
"""Detecting the language of a text string."""
client = translate.TranslationServiceClient()
location = "global"
parent = f"projects/{project_id}/locations/{location}"
response = client.detect_language(
content=content,
parent=parent,
mime_type="text/plain", # mime types: text/plain, text/html
)
for language in response.languages:
return language.language_code
def translate_text(text, project_id,source_lang):
"""Translating Text."""
client = translate.TranslationServiceClient()
location = "global"
parent = f"projects/{project_id}/locations/{location}"
# Detail on supported types can be found here:
# https://cloud.google.com/translate/docs/supported-formats
response = client.translate_text(
request={
"parent": parent,
"contents": [text],
"mime_type": "text/plain", # mime types: text/plain, text/html
"source_language_code": source_lang,
"target_language_code": "en-US",
}
)
# Display the translation for each input text provided
for translation in response.translations:
print("Translated text: {}".format(translation.translated_text))
def main():
project_id="your-project-id"
csv_files = ["sample1.csv","sample2.csv"]
# Perform your content extraction here if you have a different file format #
for csv_file in csv_files:
csv_file = open(csv_file)
read_csv = csv.reader(csv_file)
content_csv = []
for row in read_csv:
content_csv.extend(row)
content = listToString(content_csv) # convert list to string
detect = detect_language(project_id=project_id,content=content)
translate_text(text=content,project_id=project_id,source_lang=detect)
if __name__ == "__main__":
main()
You could download the file from GCS and run your logic against the local (downloaded file) and then upload to another GCS bucket. Example:
Download file from "my-bucket" to /tmp
from google.cloud import storage
client = storage.Client()
bucket = client.get_bucket("my-bucket")
source_blob = bucket.blob("blob/path/file.csv")
new_file = "/tmp/file.csv"
download_blob = source_blob.download_to_filename(new_file)
After translating/running your code logic, upload to a bucket:
bucket = client.get_bucket('my-other-bucket')
blob = bucket.blob('myfile.csv')
blob.upload_from_filename('myfile.csv')

Excel file upload using Tornado

I am writing a file upload using Torando. My API will receive only one excel file and save it in a folder. However, right now, I keep getting file corrupted error.
Here's the basic code of the API
def post(self):
user = {
"name": "Test"
}
if user:
if "test_file" not in self.request.files:
return self.write(json_util.dumps({
"message": "Required excel file missing.",
}))
bulk_excel = self.request.files['test_file'][0]
original_fname = bulk_excel['filename']
extension = os.path.splitext(original_fname)[1]
fname = ''.join(random.choice(
string.ascii_lowercase + string.digits) for _ in range(12))
final_filename = fname+extension
output_file = open(final_filename, 'w')
output_file.write(bulk_excel['body'])
Now I have also tried to write open(final_filename, 'wb') but, it did not work as intended.
Thanks in advance.

Empty file stored on Firebase with Python

My goal is to generate certain files (txt/pdf/excel) on my Python server and subsequently push it to the Firebase Storage.
For the Firebase Storage integration I use the pyrebase package.
So far I have managed to generate the file locally and subsequently store it on the right path on the Firebase Storage database.
However, the files I store are always empty. What is the reason for this?
1. Generating the localFile
import os
def save_templocalfile(specs):
# Random something
localFileName = "test.txt"
localFile = open(localFileName,"w+")
for i in range(1000):
localFile.write("This is line %d\r\n" % (i+1))
return {
'localFileName': localFileName,
'localFile': localFile
}
2. Storing the localFile
# Required Libraries
import pyrebase
import time
# Firebase Setup & Admin Auth
config = {
"apiKey": "<PARAMETER>",
"authDomain": "<PARAMETER>",
"databaseURL": "<PARAMETER>",
"projectId": "<PARAMETER>",
"storageBucket": "<PARAMETER>",
"messagingSenderId": "<PARAMETER>"
}
firebase = pyrebase.initialize_app(config)
storage = firebase.storage()
def fb_upload(localFile):
# Define childref
childRef = "/test/test.txt"
storage.child(childRef).put(localFile)
# Get the file url
fbResponse = storage.child(childRef).get_url(None)
return fbResponse
The problem was that I opened my file with Write permissions only:
localFile = open(localFileName,"w+")
The solution was to close the write operation and opening it with Read permissions:
# close (Write)
localFile.close()
# Open (Read)
my_file = open(localFileName, "rb")
my_bytes = my_file.read()
# Store on FB
fbUploadObj = storage.child(storageRef).put(my_bytes)

Categories