I'm trying to use a Folder to deposit images while running a Python Script and storing the result on my Firebase Firestore and the images to the Cloud Storage.
At the moment I have my main Function which runs the storing and the getting of the Images.
An then 3 complement functions that help me with the downloading of the images, optimization (making them smaller and less quality), and the other helps me name the file.
Here the functions:
Download Images Function:
def dl_jpg(url, file_path, file_name):
full_path = file_path + file_name + '.jpg'
path = urllib.request.urlretrieve(url, full_path)
Optimize Image (make it smaller and less Quality):
def optimizeImage(name) -> str:
foo = Image.open(os.path.join('/tmp/', name + '.jpg'))
foo = foo.resize((525,394),Image.ANTIALIAS)
foo.save('/tmp/' + name + '.jpg',optimize=True,quality=50)
print('Optimized Image: ' + name)
return '/tmp/' + name + '.jpg'
Give Random Name:
def random_name() -> str:
# printing lowercase
letters = string.ascii_lowercase
return ''.join(random.choice(letters) for i in range(10))
Now on the main Function, I get the images like this:
#Images Section
imagesRaw = []
imagesSection = soup.find('div', {'class': 'src__GalleryContainer-sc-bdjcm0-7'})
imagesInfo = imagesSection.find_all('img', {'class': 'gallery-image__StyledImg-sc-jtk816-0'})
image1 = imagesInfo[0].get('src')
for image in imagesInfo:
img = image.get('data-flickity-lazyload-src')
imagesRaw.append(img)
imagesRaw.pop(0)
imagesRaw.insert(0, image1)
images = imagesRaw[:12]
imageFile = []
#Here we will store the images in local file
for image in images:
#First we change the ending from webp to jpg
newURL = image[:-4] + 'jpg'
print(newURL)
name = find_between(newURL, "_img", "/origin.jpg")
if name == "":
name = random_name()
print(name)
#Here the function to download the image
try:
dl_jpg(newURL, '/tmp/', name)
except:
break
#Here we Optimize the image to size 500 x 394 pixels
# And get the location for the new image
try:
path = optimizeImage(name)
except:
break
# We append the path to the Array of paths
imageFile.append(path)
And Finally, in the main function, I upload the images to Firebase Storage and then the array of URLs from Storage inside the new Detail in Firestore
ref = db.collection('listings').document()
photos = []
for image in listing.photos:
fullpath = image #find_between(image, 'scrapping/', '.jpg') + '.jpg'
filename = fullpath[7:]
path = fullpath[0:6]
print('FileName: ' + filename)
print('path: '+ path)
imagePath = path + '/' + filename
bucket = store.get_bucket('testxxxxxx2365963.appspot.com')
blob = bucket.blob('ListingImages/' + ref.id + '/' + filename)
blob.upload_from_filename(imagePath)
blob.make_public()
photos.append(blob.public_url)
At the moment my problem is that at the moment it is giving an additional subfolder when uploading with this error:
"[Errno 2] No such file or directory: '/tmp/h/cabujfoh.jpg'"
Any Ideas how to fix and allow the imges optimized be uploaded.
For any of you guys, tracking this:
I found the problem, it was that I was using in my local the folder:
images/
and now change to tmp which is shorter and in this lines:
filename = fullpath[7:]
path = fullpath[0:6]
I got the route information, so I notice that the full path wasn't correct so I change into this:
fullpath = image #find_between(image, 'scrapping/', '.jpg') + '.jpg' fullpath2 = fullpath[1:] filename = fullpath2.split('/',1)[1] path = '/tmp' imagePath = path + '/' + filename
Now Working
Related
I'm trynna get the train_img and ground truth img from directory './train_dataset/train_img_cropped' & './train_dataset/train_gt_cropped'. Next, I wanna save the both original image and flipped one with a '_0', '_1'tail on its name in directory './train_dataset/train_img_preprocessed' & './train_dataset/train_gt_preprocessed'. But there's an Error of changing names (file + "_0" or "_1") as an unknown file extension. Looks like somehow PIL recognizes _0, _1 as a extension. Is there anybody who can help me to save with changing the name?
import os
import os.path
import glob
from PIL import Image
def preprocess(img_path, save_path):
targetdir = img_path
files = os.listdir(targetdir)
format = [".png"]
for (path, dirs, files) in os.walk(targetdir):
for file, i in files:
if file.endswith(tuple(format)):
image = Image.open(path + "/" + file)
image.save(save_path + "/" + file)
flippedImage = image.transpose(Image.FLIP_LEFT_RIGHT)
flippedImage.save(save_path + "/" + file)
print(file + " successfully flipped!")
else:
print(path)
print("InValid", file)
if __name__ == "__main__":
train_img_cropped_path = './train_dataset/train_img_cropped'
train_img_preprocessed_path = './train_dataset/train_img_preprocessed'
train_gt_cropped_path = './train_dataset/train_gt_cropped'
train_gt_preprocessed_path = './train_dataset/train_gt_preprocessed'
preprocess(train_img_cropped_path, train_img_preprocessed_path)
preprocess(train_gt_cropped_path, train_gt_preprocessed_path)
Not sure if this answers your question, but why not save the image with a temporary name (something like a random alphanumeric string or uuid) and then use os.rename to change the name of the temp file with your desired name ending _0 or _1.
I'm in an introductory neural networking class so mind my ignorance. Also my first SO post.
I'm trying to resize some very highly resolved images within a dataset into 80x80p grayscale images in a new dataset. However, when I do this, I'd like to keep the filenames of each new image the same as the original image. The only way I know how to resave images into a new file is through a str(count) which isn't what I want. The filenames are important in creating a .csv file for my dataset later.
The only SO post I can find that is related is this:
Use original file name to save image
But the code suggested there didn't work - wasn't sure if I was going about it the wrong way.
import os
from PIL import Image
import imghdr
count=0
path1 = "/Users/..."
path2 = "/Users/..."
listing = os.listdir(path1)
for file in listing:
type = imghdr.what((path1 + file))
if type == "jpeg":
img = Image.open("/Users/..." +file).convert('LA')
img_resized = img.resize((80,80))
img_resized.save(path2 + str(count) + '.png')
count +=1
pass
pass
Reuse the original filename that you get from the for loop i.e. file
and, split it into filename and extension using os.path.splitext() like below:
import os
from PIL import Image
import imghdr
count=0
path1 = "/Users/..."
path2 = "/Users/..."
listing = os.listdir(path1)
for file in listing:
type = imghdr.what((path1 + file))
if type == "jpeg":
img = Image.open("/Users/..." +file).convert('LA')
img_resized = img.resize((80,80))
# splitting the original filename to remove extension
img_filename = os.path.splitext(file)[0]
img_resized.save(path2 + img_filename + '.png')
count +=1
pass
Another option, we can use python str's built-in split method to split the original filename by . and discard the extension.
import os
from PIL import Image
import imghdr
count=0
path1 = "/Users/..."
path2 = "/Users/..."
listing = os.listdir(path1)
for file in listing:
type = imghdr.what((path1 + file))
if type == "jpeg":
img = Image.open("/Users/..." +file).convert('LA')
img_resized = img.resize((80,80))
# splitting the original filename to remove extension
img_filename = file.split(".")[0]
img_resized.save(path2 + img_filename + '.png')
count +=1
pass
So, if an image has a name such as some_image.jpeg then, the img_filename will have a value some_image as we splitted by . and discarded .jpeg part of it.
NOTE: This option assumes the original_filename will not contain any . other than the extension.
I assume that image name is on path1. If so you can grap image name from there in this way:
x=path1.rsplit('/',1)[1]
We are splitting path1 on last slash and taking image name string via indexing.
I searched online but found nothing really helpful. I am trying to verify a file name. If that file name already exists, change the name slightly. For instance. Writing a file User.1.1.jpg. I want it to change to User.2.1.jpg if 1.1 already exists and so on.
import cv2
import os
cam = cv2.VideoCapture(0)
cam.set(3, 640)
cam.set(4, 480)
face_detector = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
#face_id = input('\n id: ')
print("\n [INFO] Initializing face capture. Look the camera and wait ...")
count = 1
face_id = 1
while(True):
ret, img = cam.read()
img = cv2.flip(img, 1)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_detector.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)
count += 1
if os.path.exists("dataset/User.%s.1.jpg" % face_id):
face_id + 1
cv2.imwrite("dataset/User." + str(face_id) + '.' + str(count) + ".jpg", gray[y:y+h,x:x+w])
cv2.imshow('image', img)
k = cv2.waitKey(100) & 0xff
if k == 27:
break
elif count >= 30:
break
print("\n [INFO] Exiting Program and cleanup stuff")
cam.release()
cv2.destroyAllWindows()
You can use a while loop instead of an if statement to keep incrementing face_id until the target file name is found to be available.
Change:
if os.path.exists("dataset/User.%s.1.jpg" % face_id):
face_id + 1
to:
while os.path.exists("dataset/User.%s.1.jpg" % face_id):
face_id += 1
Here is a function I made to add an incrementing number to the end of the existing file name. You would only need to change the string manipulation depending on your desired new file name formatting.
def uniq_file_maker(file: str) -> str:
"""Create a unique file path"""
# get file name and extension
filename, filext = os.path.splitext(os.path.basename(file))
# get file directory path
directory = os.path.dirname(file)
# get file without extension only
filexx = str(directory + os.sep + filename)
# check if file exists
if Path(file).exists():
# create incrementing variable
i = 1
# determine incremented filename
while os.path.exists(f"{filexx} ({str(i)}){filext}"):
# update the incrementing variable
i += 1
# update file name with incremented variable
filename = directory + os.sep + filename + ' (' + str(i) + ')' + filext
return filename
Additionally, here is a similar function I made that does the same thing when creating a new directory.
def uniq_dir_maker(directoryname: str) -> str:
"""Create a unique directory at destination"""
# file destination select dialogue
Tk().withdraw() # prevent root window
# open file explorer folder select window
dirspath = filedialog.askdirectory(title='Select the output file save destination')
# correct directory file path
dirsavepath = str(dirspath + os.sep + directoryname)
# try to create directory
try:
# create directory at destination without overwriting
Path(dirsavepath).mkdir(parents=True, exist_ok=False)
# if directory already exists add incremental integers until unique
except FileExistsError:
# create incrementing variable
i = 1
# determine incremented filename
while os.path.exists(f"{dirsavepath} ({str(i)})"):
i += 1
# update directory path with incremented variable
dirsavepath = dirsavepath + ' (' + str(i) + ')'
# create now unique directory
Path(dirsavepath).mkdir(parents=True, exist_ok=False)
# add os separator to new directory for saving
savepath = dirsavepath + os.sep
return savepath
If you want to use pathlib library with adding suffix of datetime then it would be like that:
from pathlib import Path
from datetime import datetime
# path and file parameters
path = Path('/home/') #path to your files
file_name = 'file.txt' # your filename here
#
filename_full = path.joinpath(file_name)
if filename_full.is_file(): #checks whether the file of this name already exists
suffix = datetime.now().strftime("%Y%m%d_%H%M%S") #creates suffix with current date and exact time
print('The file exists. Im adding datetime {suffix} to filename')
file_name1 = filename_full.stem + suffix #adds suffix to filename
filename_full = path.joinpath(file_name1).with_suffix('.txt') #create full filename with path and the final suffix
print(filename_full)
I am a newbie to python. I am trying to put my 22k images into matrix before i process them using CNN. However, I encounter this situation which I don't know where I did wrong.
path1 = 'C:/Users/Z/Documents/Python Scripts/Data'
path2 = 'C:/Users/Z/Documents/Python Scripts/Data1'
listing = os.listdir(path1)
num_samples=size(listing)
for file in listing:
im = Image.open(path1 + '\\' + file)
img_rows, img_cols = 224, 224
img = im.resize((img_rows,img_cols),3)
img.save(path2 +'\\' + file, "JPEG")
imlist = os.listdir(path2)
img_data_list=[]
a = Image.open('Data1' + '\\'+ imlist[0]) # open one image to get size
im1 = array(a)
m,n = im1.shape[0:3] # get the size of the images
imnbr = len(imlist) # get the number of images
num_samples = len(imlist)
I got this error
Your path is incorrect when you open the single image, it should be:
a = Image.open(path2 + '\\'+ imlist[0])
a = Image.open(path2 + '\\'+ imlist[0])
You just had a small code error. "Data1" that isn't a correct path
a = Image.open('Data1' + '\\'+ imlist[0]) # open one image to get size
You are supposed to read from path2. aren't you??
I just started to use Spark for the first time for a OCR task, i have a folder of PDF files containing scanned text documents and I want to convert it to plain text. I first create a parallelized dataset of all the pdf's in the folder and perform a Map operation to create the images. I use Wand images for this task. Finally with a foreach i do the OCR using pytesseract, which is a wrapper for Tesseract.
The problem I have with this approach is that the memory use is increasing with each new document and finally i get an error "os cannot allocate memory". I have the feeling it stores the complete Img object in memory but all i need is a list of the locations of the temporary files. If I run this with a few PDF files it works but more then 5 files the system crashes...
def toImage(f):
documentName = f[:-4]
def imageList(imgObject):
#get list of generated images
imagePrefix = "{}tmp/{}/{}".format(path,documentName,documentName)
if len(img.sequence) > 1:
images = [ ("{}-{}.jpg".format(imagePrefix, x.index), documentName) for x in img.sequence]
else:
images = [("{}.jpg".format(imagePrefix), documentName)]
return images
#store images for each file in tmp directory
with WandImage(filename=path + f, resolution=300) as img:
#create tmp directory
if not os.path.exists(path + "tmp/" + documentName):
os.makedirs(path + "tmp/" + documentName)
#save images in tmp directory
img.format = 'jpeg'
img.save(filename=path + "tmp/" + documentName + '/' + documentName + '.jpg')
imageL = imageList(img)
return imageL
def doOcr(imageList):
print(imageList[0][1])
content = "\n\n***NEWPAGE***\n\n".join([pytesseract.image_to_string(Image.open(fullPath), lang='nld') for fullPath, documentName in imageList])
with open(path + "/txt/" + imageList[0][1] + ".txt", "w") as text_file:
text_file.write(content)
sc = SparkContext(appName="OCR")
pdfFiles = sc.parallelize([f for f in os.listdir(sys.argv[1]) if f.endswith(".pdf")])
text = pdfFiles.map(toImage).foreach(doOCr)
Im using Ubuntu with 8gb memory Java 7 and Python3.5
Update
I found a solution, the problem appears to be in the part where I create the imagelist, using:
def imageList(imgObject):
#get list of generated images
# imagePrefix = "{}tmp/{}/{}".format(path,documentName,documentName)
# if len(img.sequence) > 1:
# images = [ ("{}-{}.jpg".format(imagePrefix, x.index), documentName) for x in img.sequence]
# else:
# images = [("{}.jpg".format(imagePrefix), documentName)]
fullPath = "{}tmp/{}/".format(path, documentName)
images = [(fullPath + f, documentName) for f in os.listdir(fullPath) if f.endswith(".jpg")]
return natsorted(images, key=lambda y: y[0])
works perfectly, but i'm not sure why.. Everything gets closed but still it remains in memory