Pandas data layout issue - python

Trying to put an images pixel data into a pandas data frame to tun PCA across. I think I got it working but for some reason the layout is off. When I run the following code I get this result :
#read in image
img = cv2.imread('/Volumes/EXTERNAL/Stitched-Photos-for-Chris/p7_0015_20161005-949am-75m-pass-1.jpg.png',1)
row,col = img.shape[:2]
#print(row , col)
#get a unique pixel ID for each pixel
pixel = ['pixel-' + str(i) for i in range(0,row*col)]
bBand = ['bBand']
gBand = ['gBand']
rBand = ['rBand']
data = pd.DataFrame(columns=[bBand,gBand,rBand],index = pixel)
#populate data for each band
b,g,r = cv2.split(img)
data.loc[pixel,'bBand'] = b.flat[:]
data.loc[pixel,'gBand'] = g.flat[:]
data.loc[pixel,'rBand'] = r.flat[:]
print(data.head())
However, when I run the tutorial code I am basing this off I get the proper format:
genes = ['gene' + str(i) for i in range(1,101)]
wt = ['wt' + str(i) for i in range(1,6)]
ko = ['ko' + str(i) for i in range(1,6)]
data = pd.DataFrame(columns=[*wt,*ko],index = genes)
#create random data
for gene in genes:
data.loc[gene,'wt1':'wt5'] = np.random.poisson(lam=rd.randrange(10,10000),size=5)
data.loc[gene,'ko1':'ko5'] = np.random.poisson(lam=rd.randrange(10,10000),size=5)
print(data.head())
Trying to figure out if the extra gBand and rBand in the columns is an issuer or error that I have somewhere. Thanks for your help.

it looks like you are creating your columns incorrectly by making them a list
try:
pixel = ['pixel-' + str(i) for i in range(0,row*col)]
data = pd.DataFrame(columns=['bBand','gBand','rBand'],index = pixel)
#populate data for each band
b,g,r = cv2.split(img)
data.loc[pixel,'bBand'] = b.flat[:]
data.loc[pixel,'gBand'] = g.flat[:]
data.loc[pixel,'rBand'] = r.flat[:]
print(data.head())

Related

How to save output from tf.random.normal()

random_vector = tf.random.normal(shape = (25, latent_dim,)
I am training my model with the above random vector and saving the outputs as a gird of 5x5.jpg file. But since my dataset has 60k images I am unable to find the corresponding input images.
My question is how can I save the random_vector as a 5x5 grid. Here is the code I used to save output from my model:
def save_images(model, epoch, step, input_):
prediction = model.predict(input_)
fig, axes = plt.subplots(5,5, figsize = (14,14))
idx = 0
for row in range(5):
for column in range(5):
image = prediction[idx] * 255
image = image.astype("int32")
axes[row, column].imshow(image)
axes[row, column].axis("off")
idx+=1
output_path = "./anime-faces/"
if not os.path.exists(output_path):
os.mkdir(output_path)
plt.savefig(output_path + "Epoch_{:04d}_step_{:04d}.jpg".format(epoch, step))
plt.close()
I am new to tensorflow and AIML so most of the code is written from different sources on internet, sorry if it is something obvious.
You could use a similar code that you are using for saving the output images, on the input_ that you feed into the function save_images, giving
fig, axes = plt.subplots(5,5, figsize = (latent_dim,))
idx = 0
for row in range(5):
for column in range(5):
image = inputs_[idx] * 255
image = image.astype("int32")
axes[row, column].imshow(image)
axes[row, column].axis("off")
idx+=1
output_path = "./anime-faces-inputs/"
if not os.path.exists(output_path):
os.mkdir(output_path)
plt.savefig(output_path + ".jpg")
plt.close()

The data(number) I append to DataFrame using panda

I built a CNN prediction model to do the prediction. And I append the result to a DataFrame as pic show. However, why does my prediction column has 2 brackets [[]] around my data? How to get rid of it and show the number only?
test_img = 'C:/Users/User/Desktop/GF_BSIF/Circle_Cropped_test_images/*.jpg'
Test_Path = 'C:/Users/User/Desktop/GF_BSIF/Circle_Cropped_test_images'
Name = []
result = []
for name in os.listdir(Test_Path):
Name.append(name[0:-5])
for img in glob.glob(test_img):
prediction = model.predict(prepare(img))
result.append(prediction)
Temp = {'File Name':Name, 'Prediction':result}
temp = pd.DataFrame(Temp)
temp
So After I change the result.append(prediction) to result.append(prediction[0])
test_img = 'C:/Users/User/Desktop/GF_BSIF/Circle_Cropped_test_images/*.jpg'
Test_Path = 'C:/Users/User/Desktop/GF_BSIF/Circle_Cropped_test_images'
Name = []
result = []
for name in os.listdir(Test_Path):
Name.append(name[0:-5])
for img in glob.glob(test_img):
prediction = model.predict(prepare(img))
result.append(prediction[0])
Temp = {'File Name':Name, 'Prediction':result}
temp = pd.DataFrame(Temp)
temp
The DataFrame shows 1 bracket left.
Any way to remove the bracket?
seems your prediction is 2d array, can you print the return type and value of model.predict(). like below:
print(type(prediction))
print(prediction)

How to create a tuple with the same shape of another one

I'm trying to augment my data for a CCN problem.
I have a csv with 3 image (center, left, right) and a steering angle which is the same for all the three images. (I used the Udacity self driving car simulator).
I want to increase my data, so I'm trying to do augmentation on my image.
I need to create a tuple with a shape like this:
image_path = data[["center", "left", "right"]].values
But before, I have to augment my data.
That's my code:
steerings = data["steering"].values
steerings = list(steerings)
center = data["center"].values
left = data["left"].values
right = data["right"].values
center = list(center)
new_center = []
left = list(left)
new_left = []
right = list(right)
new_right = []
new_steerings = []
for index in range(len(center)):
new_center_img = augment_image(center[index], steerings[index])[0]
new_center.append(new_center_img)
new_left_img = augment_image(left[index], steerings[index])[0]
new_left.append(new_left_img)
new_right_img = augment_image(right[index], steerings[index])[0]
new_right.append(new_right_img)
new_steerings.append(steerings[index])
center.extend(new_center)
left.extend(new_left)
right.extend(new_right)
steerings.extend(new_steerings)
image_path = tuple(center), tuple(left), tuple(right)
steerings = tuple(steerings)
return train_test_split(image_path, steerings, test_size=0.2, random_state=1)
But:
image_path = tuple(center), tuple(left), tuple(right)
is not the same as:
image_path = data[["center", "left", "right"]].values
How can I obtain the same thing when I return image path, but with double images due to augmentation?
If I understand correctly, I think you just need to do:
image_path = np.stack([center, left, right], axis=1)
Where np is NumPy.

How can I add a list of saved images into an existing dataframe in pandas?

I was hoping somebody would be able to help me. I am trying to store a list of saved images from MatPlotLib as a dataframe (or a list) and then add it to an existing dataframe (effectively creating small barcharts for each entry in the dataframe e.g. databars).
I have managed to save the images successfully with a loop. There are 242 images. How can I show these images in a column in a dataframe. I want it to be easy to append it to my existing dataframe to show visually the number of zero values in this dataset. My code gives errors that it NoneType object is not iterable.
This is my code. (Top half just here for clarification as to what q1 and q2 are.)
Thanks.
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
q1 = pandas.read_csv("data\q1.csv") #dataframe
q1.apply(lambda x: x.str.strip() if x.dtype == "object" else x) #strip whitespace
q1 = q1.dropna()
code = q1.loc[:,"Code"]
region = q1.loc[:,"Region"]
name = q1.loc[:,"Name"]
heads = list(q1.columns.values) #creates list of header values
nz = (q1 == 0).sum(axis=1) #count number of zero values in rows
q2 = q1[['Code','Region','Name']]
q2 = q2.assign(nz=nz.values)
samples=[]
y=1
for val in q2['nz']:
val = val/q2['nz'].max() * 100
plt.barh(val, width = val, color="blue")
plt.xlim((0,100))
plt.yticks([0])
plt.axis('off')
x = plt.savefig("value" + str(y) + ".png", bbox_inches='tight')
samples.append(x)
plt.close()
y = y + 1
imgdf = pandas.DataFrame.from_records(samples)
q3 = q2.append(imgdf)
If you are working in a jupyter notebook, then you can use the HTML display to show the images.
# Some imports
import base64
import pandas as pd
from PIL import Image
from io import BytesIO
from IPython.display import HTML
pd.set_option('display.max_colwidth', -1)
def get_thumbnail(path):
"""
Output a 150x150 sized PIL Image
"""
i = Image.open(path)
i.thumbnail((150, 150), Image.LANCZOS)
return i
def image_base64(im):
"""
Convert to base64 to be given as the src field of img in HTML
"""
if isinstance(im, str):
im = get_thumbnail(im)
with BytesIO() as buffer:
im.save(buffer, 'jpeg')
return base64.b64encode(buffer.getvalue()).decode()
def image_formatter(im):
return f'<img src="data:image/jpeg;base64,{image_base64(im)}">'
# Skipping some of your code
image_paths = []
for val in q2['nz']:
#... Do somethings here
x = plt.savefig("value" + str(y) + ".png", bbox_inches='tight')
plt.close()
image_paths.append("value" + str(y) + ".png")
y = y + 1
q2["images_paths"] = pd.Series(image_paths).values
q2["image"] = q2.image_paths.map(lambda f: get_thumbnail(f))
# Display PIL Images embedded in the dataframe
HTML(q2.to_html(formatters={"image": image_formatter}, escape=False))

SVM with openCV & Python

I'm tryint to build an application that classifies different objects. I have a training folder with a bunch of images i want to use as training for my SVM.
Up untill now I have followed this (GREAT) answer:
using OpenCV and SVM with images
here is a sample of my code:
def getTrainingData():
address = "..//data//training"
labels = []
trainingData = []
for items in os.listdir(address):
## extracts labels
name = address + "//" + items
for it in os.listdir(name):
path = name + "//" + it
print path
img = cv.imread(path, cv.CV_LOAD_IMAGE_GRAYSCALE)
d = np.array(img, dtype = np.float32)
q = d.flatten()
trainingData.append(q)
labels.append(items)
######DEBUG######
#cv.namedWindow(path,cv.WINDOW_NORMAL)
#cv.imshow(path,img)
return trainingData, labels
svm_params = dict( kernel_type = cv.SVM_LINEAR,
svm_type = cv.SVM_C_SVC,
C=2.67, gamma=3 )
training, labels = getTrainingData()
train = np.asarray(training)
svm = cv.SVM()
svm.train(train, labels, params=svm_params)
svm.save('svm_data.dat')
But when i try to run i recieve the following error:
svm.train(train, labels, params=svm_params)
TypeError: trainData data type = 17 is not supported
What am i doing wrong?
Thanks A lot!
You should resize your input images. like this:
img = cv2.resize(img, (64,64))
Size is up to you.

Categories