I have this set of images from which I want to create a set of sub images with a stride with sub image size of 128*128, original image must be greater than this size (row and column both), I have created the following functions :
def sliding_window(image, stride, imgSize):
height, width, _ = image.shape
img = []
a1 = list(range(0, height-imgSize+stride, stride))
a2 = list(range(0, width-imgSize+stride, stride))
if (a1[-1]+imgSize != height):
a1[-1] = height-imgSize
if (a2[-1]+imgSize != width):
a2[-1] = width-imgSize
for y in a1:
for x in a2:
im1 = image[y:y+imgSize, x:x+imgSize, :]
img.append(np.array(im1))
return img
and the main code snippet from where I call this definition :
im_counter = 0
image_data = []
image_label = []
for cl in file_images:
for img_file in data[cl]:
path = img_path + cl + "/" + img_file
im = image.load_img(path)
im = image.img_to_array(im)
im_counter += 1
if(im_counter % 500 == 0):
print("{} images processed...".format(im_counter))
if (im.shape[0] >= SIZE and im.shape[1] >= SIZE):
img = sliding_window(im, STRIDE, SIZE)
for i in range(len(img)):
if(img[i].shape[2] >=3):
temp_img = img[i]
temp_img = preprocess_input(temp_img)
image_data.append(temp_img)
del temp_img
gc.collect()
image.append(class_dictionary[cl])
Now, the above code snippet takes forever to run on only 3000 images (takes at least 25 hours with utilizing only 1 CPU core), I want to make this faster, I have server access, the CPU has many cores, so can you please suggest a parallelized version of it so that it runs faster ?
NOTE : The sequence of subimages in which it is returned from the original image matters very much, No arbitrary sequence of image is allowed.
Here is a rough outline of something you can try.
def main():
# Create a list of tuples consisting of the file path, and the class
# dictionary info for each of the cl arguments
args = []
for cl in file_images:
for img_file in data[cl]:
path = img_path + cl + "/" + img_file
args.append((path, class_dictionary[cl]))
with multiprocessing.Pool(processes=30) as pool: # or however many processes
image_counter = 0
# Use multiprocessing to call handle_on_image(pathname, info)
# and return the results in order
for images, info in pool.starmap(handle_one_image, args):
# Images is a list of returned images. info is the class_dictionary info that we passed
for image in images:
image_counter += 1
image_data.append(image)
image_label.append(info)
def handle_one_image(path, info):
image_data = []
im = image.load_img(path)
im = image.img_to_array(im)
if (im.shape[0] >= SIZE and im.shape[1] >= SIZE):
img = sliding_window(im, STRIDE, SIZE)
for i in range(len(img)):
if(img[i].shape[2] >=3):
temp_img = img[i]
temp_img = preprocess_input(temp_img)
image_data.append(temp_img)
return image_data, info
else:
# indicate that no images are available
return [], info
Related
I'm trying to convert some transparent PNGs to one animated GIF, but there is a trimming issue. Except for the first image, all other images' outermost space with only black colour are cropped and become transparent.
e.g. the red part of the PNG will be cut away in the generated GIF:
example pic for trimming issue of animated GIF
Below is my code. Sorry if it is a bit messy cause I am still learning Python.
from PIL import Image
import glob
# https://stackoverflow.com/questions/46850318/transparent-background-in-gif-using-python-imageio
def gen_frame(path):
im = Image.open(path)
alpha = im.getchannel('A')
# Convert the image into P mode but only use 255 colors in the palette out of 256
im = im.convert('RGB').convert('P', palette=Image.Palette.ADAPTIVE, colors=255)
# Set all pixel values below 128 to 255 , and the rest to 0
mask = Image.eval(alpha, lambda a: 255 if a <=0 else 0)
# Paste the color of index 255 and use alpha as a mask
im.paste(255, mask)
# The transparency index is 255
im.info['transparency'] = 255
return im
def resize4Twitter(img):
TWITTER_MAX_WIDTH, TWITTER_MAX_HEIGHT = 1280, 1080
if img.width < TWITTER_MAX_WIDTH and img.height < TWITTER_MAX_HEIGHT:
return img
elif img.width/img.height > TWITTER_MAX_WIDTH/TWITTER_MAX_HEIGHT:
x, y = TWITTER_MAX_WIDTH, (img.height / img.width * TWITTER_MAX_WIDTH)
else:
x, y = (img.width / img.height * TWITTER_MAX_HEIGHT), TWITTER_MAX_HEIGHT
return img.resize((int(x),int(y)))
### User Input
imagePath, gifName, fpsStr, forTwitter = '', '', '', ''
fps = 0
imagePath = input("Enter PNG path:")
gifName = input("Enter GIF name:")
while fps == 0:
fpsStr = input("Enter FPS [1-50]:")
if fpsStr.isdigit():
if int(fpsStr) >= 1 and int(fpsStr) <= 50:
fps = int(fpsStr)
else:
print("Invalid. Please enter an integer from 1 to 50.")
else:
print("Invalid. Please enter an integer from 1 to 50.")
while forTwitter!= "Y" and forTwitter != "N":
forTwitter = input("Resize for Twitter? [Y/N]: ")
### filepaths
fp_in = imagePath + "\\" + gifName + "_*.png"
details = "_fps" + str(fps)
if forTwitter == "Y":
details = details + "_twitterSize"
fp_out = imagePath + "\\" + gifName + details + ".gif"
### Process Images
# https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html#gif
# https://legacy.imagemagick.org/script/command-line-options.php?#dispose
imgpaths = sorted(glob.glob(fp_in))
imgs = []
for imgpath in imgpaths:
img = gen_frame(imgpath)
if forTwitter == "Y":
img = resize4Twitter(img)
imgs.append(img)
print("Image loaded:\t" + imgpath)
imgs = iter(imgs) # I tried .show() here, the PNGs are still normal
dur = 1000/fps
img = next(imgs) # extract first image from iterator
img.save(fp_out, save_all=True, append_images=imgs,
optimize=False, duration=dur, loop=0, disposal=2) # use diposal to clear prev. frame
print("Animated GIF produced at: " + fp_out)
with the following code, I augmented some images and I would like to know how I can make each generated image have the corresponding label
I try with save_prefix = +label but it just prints the same name for all images
I appreciate any help or comment
def get_augmt(image,model):
i = 0
img_list =[]
for batch in model.flow (x= image,
batch_size = 1,
save_to_dir ='probando',
save_prefix =" " + label,
save_format ='jpeg',
subset = None):
i += 1
img_list.append(batch)
if i > 5:
return img_list ```
pr["img_aug"] = pr["images"].apply(lambda x: get_augmt(x,datagen))
It prints the same name for all images 'couse you're taking the label variable as global and never change it in the function.
If in pr["image"] you are able to attach to each image a label it will be easier to access it from the function get_augmt.
Otherways you can use i instead and the code would be:
def get_augmt(image,model):
i = 0
img_list =[]
for batch in model.flow (x= image,
batch_size = 1,
save_to_dir ='probando',
save_prefix =" " + str(i),
save_format ='jpeg',
subset = None):
i += 1
img_list.append(batch)
if i > 5:
return img_list ```
pr["img_aug"] = pr["images"].apply(lambda x: get_augmt(x,datagen))
I have a JSON file with the next structure:
json
{'featureId': 'ckek0ugf2061y0ybwgunbdrt5',
'schemaId': 'ckek0jkvp081j0yaec2ap9a3w',
'title': 'Tree',
'value': 'tree',
'color': '#FFFF00',
'instanceURI': 'https://api.labelbox.com/masks/feature/ckek0ugf2061y0ybwgunbdrt5?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...'}
InstanceURI is one tree that I segmented from the original image used Labelbox (https://labelbox.com/). I am using PSPNet-cityscapes. That model requires a mask to the validation stage in png format. Some images have several trees (several instances URIs).
How can I convert this JSON element in a png image?
Not the fastest and most beautiful script - but it works for me...
from PIL import Image, ImageColor, ImageDraw
from PIL import UnidentifiedImageError
import requests
import json
import argparse
import pathlib
import os.path
def manual_classes():
"""
Change your preferenced color-coding below.
If you want to use manual coloring, you also need to change the Label-Classes (Title)
"""
manual_dict = {
'Tree': 255,
'Flower': 85,
}
return manual_dict
def open_img(url):
try:
return Image.open(requests.get(url, stream=True).raw)
except UnidentifiedImageError:
return None
def open_json(path):
with open(path) as file:
return json.load(file)
def color_extractor(data, color_coding):
"""takes the given dictionary part and extracts all needed information. returns also colors for 3 different types"""
if color_coding == 'auto':
color = ImageColor.getcolor(data['color'], 'RGBA')
elif color_coding == 'manual':
color = (manual_classes()[data['title']],manual_classes()[data['title']],manual_classes()[data['title']],255)
elif color_coding == 'binar':
color = (255,255,255,255)
else:
print('no valid color-code detected - continue with binarized Labels.')
color = (255,255,255,255)
return color
def img_color(img, color):
"""change color of label accordingly"""
if color == (255,255,255,255):
return img
img = img.convert('RGBA')
width, height = img.size
for x in range(width):
for y in range(height):
if img.getpixel((x,y)) == (255,255,255,255):
img.putpixel((x,y), color)
return img
def img_draw_polygon(size, polygon, color):
"""draw polygons on image"""
img = Image.new('RGBA', size, (0,0,0,0))
img = img.convert('RGBA')
draw = ImageDraw.Draw(img)
# read points
points = []
for i in range(len(polygon)):
points.append((int(polygon[i]['x']),int(polygon[i]['y'])))
draw.polygon(points, fill = (color))
return img
def progressBar(current, total, barLength = 20):
percent = float(current) * 100 / total
arrow = '-' * int(percent/100 * barLength - 1) + '>'
spaces = ' ' * (barLength - len(arrow))
print('Progress: [%s%s] %d %%' % (arrow, spaces, percent), end='\r')
def main(input_dir, output_dir, color_type='auto'):
if os.path.exists(input_dir) and os.path.exists(output_dir) and color_type in ['auto', 'manual', 'binar']:
input_path = pathlib.Path(input_dir)
label_paths_sorted = sorted(list(input_path.glob("*.json")))
for image_path in label_paths_sorted:
print('converting: {}'.format(os.path.basename(image_path)))
# open json file
data = open_json(image_path)
# create image list for Labels
img_list = []
# read original image
original_img = open_img(data[0]['Labeled Data'])
try:
width, height = original_img.size
except Exception:
print('Original image data not callable. Please provide image width and height.')
for i in range(len(data[0]['Label']['objects'])):
# read path and open image
img = open_img(data[0]['Label']['objects'][i]['instanceURI'])
# if path is not readable try to read polygon-data-points
if not img is None:
img = img_color(img, color_extractor(data[0]['Label']['objects'][i], color_type))
img_list.append(img)
else:
try:
# img = img_draw_polygon(img, data[0]['Label']['objects'][i]['polygon'], data[0]['Label']['objects'][i]['title'])
img = img_draw_polygon((width,height), data[0]['Label']['objects'][i]['polygon'], color_extractor(data[0]['Label']['objects'][i], color_type))
img_list.append(img)
except Exception:
print('Note: There are no available polygon-data-points & web-data-information for Label #{}.'.format(i))
# print current progress status
progressBar(i, len(data[0]['Label']['objects']))
img = img_list[0]
for i in range(1, len(img_list)):
img.paste(img_list[i], (0,0), mask= img_list[i])
img.save(output_dir + os.path.basename(image_path).replace('.json', '.png'))
else:
print('One of your given inputs is incorrect - please try again.')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="convert annotations from labelbox2png")
parser.add_argument("--input", help="input-directory")
parser.add_argument("--output", help="output-directory")
parser.add_argument("--color", help="binar, auto or manual")
args = parser.parse_args()
main(args.input, args.output, args.color)
To run it - just save this python-script and execute it in your command:
C:\Users>python script.py --input input_directory/ --output output_directory --color auto
With the input color you can modify the color-coding of your Labels. auto takes the colors from the JSON, manual you have to modify and binar white-labels everything.
I have randomly sized rectangular images that I want to crop into a square along the shortest side, resize them into the input shape, and feed into a neural network. I know a lot of Image augmentation techniques are supported in the keras.preprocessing.image.ImageDataGenerator but I have not seen any techniques for cropping square images.
Does a keras function exist for this? If not, is it possible to use an external cropping function and connect that somewhere between the ImageDataGenerator and flow_from_directory method?.
I have also tried building my own generator, but this takes substantially longer than previous trials with the flow_from_directory method.
My current workflow:
image_shape = (224, 224, 3)
def im_crop(image):
dim = image.size
shortest = min(dim[0:2])
longest = max(dim[0:2])
lv = np.array(range(0, shortest)) + floor((longest - shortest) / 2)
if dim[0] == shortest:
im_cropped = np.asarray(image)[lv, :, :]
else:
im_cropped = np.asarray(image)[:, lv, :]
im_cropped = Image.fromarray(im_cropped)
return im_cropped
def im_rescale(image, shape):
im_resized = image.resize((shape[0], shape[1]))
im_array_resized = np.array(im_resized)
return im_array_resized
def getFilesLabels(path):
paths = list()
files = list()
labels = list()
for (dirpath, dirnames, filenames) in os.walk(path):
paths += [os.path.join(dirpath, file) for file in filenames]
files += [file for file in filenames]
labels += [os.path.basename(dirpath) for file in filenames]
df = pd.DataFrame()
df['path'] = paths
df['fname'] = files
df['label'] = labels
df = df[~df['fname'].str.contains(".DS_Store")]
classes = list(set(df['label']))
num_label = [classes.index(l) for l in df['label']]
df['num_label'] = num_label
return df
def get_input(path):
img = Image.open(path)
return img
def get_output(path, label_file=None):
labels = label_file['num_label'][label_file['path'] == path].iloc[0]
return labels
def val_preprocess_input(image, shape):
image1 = im_crop(image)
image2 = im_rescale(image1, shape)
return image2
def train_preprocess_input(image, shape):
image1 = im_crop(image)
image2 = im_rescale(image1, shape)
return image2
def image_generator(df, shape, batch_size=32):
while True:
# Select files (paths/indices) for the batch
batch_paths = np.random.choice(a=df['path'],
size=batch_size)
batch_input = []
batch_output = []
# Read in each input, perform preprocessing and get labels
for input_path in batch_paths:
input = get_input(input_path)
output = get_output(input_path, label_file=df)
input = train_preprocess_input(image=input, shape=shape)
batch_input += [input]
batch_output += [output]
# Return a tuple of (input, output) to feed the network
batch_x = np.array(batch_input)
batch_y = np.array(batch_output)
yield (batch_x, batch_y)
train_set = getFilesLabels(os.path.join(output_path, "Images/Network Input/train"))
val_set = getFilesLabels(os.path.join(output_path, "Images/Network Input/val"))
train_generator = image_generator(train_set, image_shape)
val_generator = image_generator(val_set, image_shape)
# Generators are then fed to keras.model instance
I'm creating an image popularity algorithm that cuts a video% .mp4 into frames. With the help of AI, the program examines which frames probably display the most beautiful images; the result of this is expressed in 'score'.
This works but I encounter a problem. Because certain frames in a video are very similar, I have many frames with (almost) the same score.
In the end result, a list is generated with [score, frame number]. I want, for example, if 3 items in the list are almost identical frame numbers and therefore (almost) identical scores, I only keep the frame number in the list with the highest score in order to remove duplicates.
It has something to do with this line: result.append((predict(pil_image, model), name))
Here is the code:
import os
import torch
import torchvision.models
import torchvision.transforms as transforms
from PIL import Image
import json
import cv2
def prepare_image(image):
if image.mode != 'RGB':
image = image.convert("RGB")
Transform = transforms.Compose([
transforms.Resize([224, 224]),
transforms.ToTensor(),
])
image = Transform(image)
image = image.unsqueeze(0)
return image
def predict(image, model):
image = prepare_image(image)
with torch.no_grad():
preds = model(image)
score = preds.detach().numpy().item()
print("Picture score: " + str(round(score, 2)) + " | frames left: " +str(framesToDo))
return str(round(score, 2))
if __name__ == '__main__':
model = torchvision.models.resnet50()
model.fc = torch.nn.Linear(in_features=2048, out_features=1)
model.load_state_dict(torch.load('model/model-resnet50.pth', map_location=torch.device('cpu')))
model.eval()
result = []
# In de folder videos are videos saved with the name of 1 until 23
for i in range(1, 23):
vidcap = cv2.VideoCapture('./video/' + str(i) + '.mp4')
succes, vidcap_image = vidcap.read()
count = 0
framestep = 500 #for Stackoverflow example
framesToDo = vidcap.get(cv2.CAP_PROP_FRAME_COUNT)
# while succes and count < max_frames
while succes and count < int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)): #maximum amount of frames in video
name = str(i) + '_' + str(count)
cv2.imwrite("./frames_saved/" + 'vid' + '_' + name + ".jpg", vidcap_image) # save frame as jpg image
count += framestep # 500 frames further
framesToDo = framesToDo - framestep
cv2_image = cv2.cvtColor(vidcap_image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(cv2_image)
result.append((predict(pil_image, model), name))
succes, vidcap_image = vidcap.read()
result.sort(reverse=False)
print(result)
with open('result.json', 'w') as filehandle:
filehandle.write(json.dumps(result))````
Since there is no reproducible example, you can adapt this to solve your problem, this analyses each frame data and skips unnecessary ones, updates the best values and append new values.
MAX_FRAME_NUMBER_DIFF = 60
MAX_SCORE_DIFF = 0.5
current_frame = count
current_score = predict(pil_image, model)
data = (current_score, current_frame)
if not results:
results.append(data)
else:
last_score, last_frame = results[-1]
is_similar_frame = current_frame - last_frame <= MAX_FRAME_NUMBER_DIFF
is_score_better = current_score > last_score
is_score_way_better = current_score - last_score <= MAX_SCORE_DIFF
if is_similar_frame:
if is_score_better:
if is_score_way_better: # if diff between current score and previous score bigger than MAX_SCORE_DIFF
results.append(data)
else: # current score better than previous but not so better
results[-1] = data # update last value
else: # current score not better than previous
continue # skip this one
else: # if not similar frames
results.append(data)