how to save file image as original name - python

I want to save output images using original name of image.
I try this code, but most cases work well, and a half save other file names. How to do it better?
cropped_images = "GrabCut"
if not os.path.exists(cropped_images):
os.makedirs(cropped_images)
# Load data
filepath = "Data"
images = [cv2.imread(file) for file in glob.glob(filepath + "/*.jpg")]
file_names = []
for filename in os.listdir(filepath):
org_image_name = os.path.splitext(filename)[0]
file_names.append(org_image_name)
for i, image in enumerate(images):
DO SOMETHING...
img_name = file_names[i]
cropped_images_path = os.path.join(cropped_images, img_name + '.jpg')
cv2.imwrite(cropped_images_path, image)

The reason you have the error is because the lists made by glob and os.listdir are not the same, either different files (glob is only getting jpg files and listdir gets everything) or different order, or both. You can change the filenames in a list, orig_files, to make a corresponding list of new filenames, new_files.
It also looks like it makes more sense to just read one image at a time (you only use them one at a time) so I moved that into the loop. You can also use os.path.basename to get the filename, and zip to iterate through multiple lists together.
cropped_images = "GrabCut"
if not os.path.exists(cropped_images):
os.makedirs(cropped_images)
# Load data
filepath = "Data"
orig_files = [file for file in glob.glob(filepath+"/*.jpg")]
new_files = [os.path.join(cropped_images, os.path.basename(f)) for f in orig_files]
for orig_f,new_f in zip(orig_files,new_files):
image = cv2.imread(orig_f)
DO SOMETHING...
cv2.imwrite(new_f, image)

Related

How to get the pictures in group from excel files

I have tried to extract pictures from excel, but after grouping the text or lines inserted in Excel with pictures, the pictures after the group cannot be completely extracted when extracting the pictures. What should I do?
This is the code I try
import os
import zipfile
import numpy as np
import win32com.client as win32
from PIL import Image
path = 'C:/Users/Peter/Desktop/test/'
count = 1
for file in os.listdir(path):
new_file = file.replace(".xlsx",".zip")
os.rename(os.path.join(path,file),os.path.join(path,new_file))
count+=1
number = 0
list_dir = os.listdir(path)
for i in range(len(list_dir)):
if 'zip' not in list_dir[i]:
list_dir[i] = ''
while '' in list_dir:
list_dir.remove('')
for zip_name in list_dir:
azip = zipfile.ZipFile(path + zip_name)
namelist = (azip.namelist())
for idx in range(0,len(namelist)):
#print(namelist[idx][:9])
if namelist[idx][:9] == 'xl/media/':
img_name = path + str(number)+'.jpg'
f = azip.open(namelist[idx])
img = Image.open(f)
img = img.convert("RGB")
img.save(img_name,"JPEG")
number+=1
f.close()
azip.close()
for file in os.listdir(path):
new_file = file.replace(".zip",".xlsx")
os.rename(os.path.join(path,file),os.path.join(path,new_file))
count+=1
This is my excel file, and the pictures are get from goole for trying, and I added text and arrows to go in.
And this is the pictures I get, there is no text or line in the pictures.
I know very little about Excel, so there may be a much better explanation, but it seems to me that the annotations are stored in an OpenXML file called drawing1.xml inside your XLSX archive.
I can see your two red triangles and the label 40 as annotated below - note that val="ff0000" would correspond to red.

Unable to append in order

import os
train_dir = "/Images/train/"
data = []
for i in os.listdir(train_dir):
path = os.path.join(train_dir, i)
img = cv2.imread(path)
print(i)
data.append(img)
My train directory has 49000 images in order img(1), img(2), ..., img(49000)
I want to append these images in this order only but they are getting appended in a different order (as shown in the image).
Any help?
I want to append them as img(1).png, img(2).png, img(3).png, and so on.
Using the sorted method helped me.
data = []
train_dir = "/Images/train/"
files = os.listdir(train_dir)
files = sorted(files ,key=lambda x: int(os.path.splitext(x)[0]))
for i in (files):
path = os.path.join(train_dir, i)
img = cv2.imread(path)
data.append(img)
So all you want to do is to list the images name in a python list. Which is filenames in my solution. Python list has function called sort() which will sort all the images name. Your new filenames list will be in sorted order relative to your images name that are there inside your directory. So, iterating through the list, you will be getting the sorted images name.
train_dir = "/Images/train/"
filenames = [img for img in os.listdir(train_dir)]
filenames.sort()
data = []
for i in filenames:
path = os.path.join(train_dir, i)
img = cv2.imread(path)
print(i)
data.append(img)

Can I loop through directories and subdirectories and store certain files in an array?

I have one folder that contains many subfolders, and images within those subfolders. I have code that loops through the folders and subfolders and prints out the name of each image one at a time. I want all of these image names to be stored in a single array. How do I get my loop to append each image name to the same array?
I have only seen similar solutions on Linux or Matlab so far, but not on python.
files = []
#r=root, d=directories, f = files
for r, d, f in os.walk(path):
for face_image in f :
if face_image.endswith("g"): #to get all '.jpg' and all '.png' files
print(face_image)
When I run the loop above, I get all ~1000 image names printed. But when I then try and print(face_image) outside of the loop, only the name of the final image in the loop is printed. I now now this is because I have not appended each name to an array, but am not sure how to go about this? Any help would be massively appreciated!
Using pathlib and a recursive glob pattern:
from pathlib import Path
file_types = ("jpg", "png")
file_paths = []
for file_type in file_types:
file_paths.extend(Path(".").glob(f"**/*.{file_type}"))
file_names = [file_path.name for file_path in file_paths]
After your print statement, you can use files.append(face_image) to add the face image to your list. When the loops are done, all valid image names will be in the list for you to use.
I wasn't sure if this was a legit question or not. You need to append the files to the list.
files = []
#r=root, d=directories, f = files
for r, d, f in os.walk(path):
for face_image in f :
if face_image.endswith("g"): #to get all '.jpg' and all '.png' files
print(face_image)
files.append(face_image)
You could try something like this:
files = []
for r, d, f in os.walk(path):
# collect all images
files += [os.path.join(r, file) for file in f]
# filter images
files = [ff for ff in files if ff.endswith('g')]
or a little more compact:
files = []
for r, d, f in os.walk(path):
# collect all images that end with 'g'
files += [os.path.join(r, file) for file in f if file.endswith('g')]

Duplicating and renaming images based on filename range (xxxx-xxxx.jpg)

I have a bunch of images that have filenames that represent a range of values that I need to split into individual images. For example, for an image with the filename 1000-1200.jpg, I need 200 individual copies of the image named 1000.jpg, 1001.jpg, 1002.jpg, etc.
I know a bit of python but any suggestions on the quickest way to go about this would be much appreciated.
EDIT: Here's what I have so far. The only issue is that it strips leading zeros from the filename and I'm not quite sure how to fix that.
import os
from shutil import copyfile
fileList = []
filePath = 'C:\\AD\\Scripts\\to_split'
for file in os.listdir(filePath):
if file.endswith(".jpg"):
fileList.append(file)
for file in fileList:
fileName = os.path.splitext(file)[0].split("-")
rangeStart = fileName[0]
rangeEnd = fileName[1]
for part in range(int(rangeStart), int(rangeEnd)+1):
copyfile(os.path.join(filePath, file), os.path.join(filePath, str(part) + ".jpg"))
Lets break the problem down:
Step 1. Get all files in folder
Step 2. for each file, Get string from filename
Step 3. split the string into two ints a and b with str.split("-")
Step 4. for x in range(a, b), copy file and set the name of the file as str(x)

python: can i move a file based on part of the name to a folder with that name

I have a directory with a large number of files that I want to move into folders based on part of the file name. My list of files looks like this:
ID1_geneabc_species1.fa
ID1_genexy_species1.fa
ID2_geneabc_species1.fa
ID3_geneabc_species2.fa
ID3_genexy_species2.fa
ID4_genexy_species3.fa
I want to move the files I have into separate folders based on the last part of the file name (species1, species2, species3). The first parts of the file name do not always have the same number of numbers and/or letters but are always in 3 parts separated by an underscore '_'.
This is what I have tried from looking online but it does not work:
import os
import glob
dirs = glob.glob('*_*')
files = glob.glob('*.fa')
for file in files:
name = os.path.splitext(file)[0]
matchdir = next(x for x in dirs if name == x.rsplit('_')[0])
os.rename(file, os.path.join(matchdir, file))
I have the list of names (species1, species2, species3) in a list in the script below, which correspond to the third part of my file name. I am able to create a set of directories in my current working directory from each of these names. Is there be a better way to do this after the following script, like looping through the list of species, matching the file, then moving it into the correct directory? THANKS.
from Bio import SeqIO
import os
import itertools
#to get a list of all the species in genbank file
all_species = []
for seq_record in SeqIO.parse("sequence.gb", "genbank"):
all_species.append(seq_record.annotations["organism"])
#get unique names and change from set to list
Unique_species = set(all_species)
Species = list(Unique_species)
#send to file
f = open('speciesnames.txt', 'w')
for names in Species:
f.write(names+'\n')
f.close()
print ('There are ' + str(int(len(Species))) + ' species.')
#make directory for each species
path = os.path.dirname(os.path.abspath(__file__))
for item in itertools.product(Species):
os.makedirs(os.path.join(path, *item))
So, you want a function, which gets folder name from file. Then you iterate over files, create dirs which don't exist and move files there. Stuff like that should work out.
def get_dir_name(filename):
pos1 = filename.rfind('_')
pos2 = filename.find('.')
return filename[pos1+1:pos2]
for f in glob.glob('*.fa'):
cwd = os.getcwd()
dir_name = cwd+'/'+get_dir_name(f)
print dir_name
if not os.path.exists(dir_name):
os.mkdir(dir_name)
os.rename(f, dir_name+'/'+f)

Categories