I am trying to convert any .png images with a transparent background to a white background.
however I am getting an error that says tuple object is not callable.
I have tried this:
def transparent_to_white(img):
color = (255, 255, 255)
for x in range(img.size()):
for y in range(img.size()):
r, g, b, a = img.getpixel((x, y))
if a == 0:
img.putpixel((x, y), color)
return img
but I get this error:
Original Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/fetch.py", line 58, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/fetch.py", line 58, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/content/gdrive/My Drive/All_Deep_Learning/PythonCustomLibraries/pix2pixdatasetlib.py", line 49, in __getitem__
y_label = self.resize(transparent_to_white(y_label))
File "/content/gdrive/My Drive/All_Deep_Learning/PythonCustomLibraries/pix2pixdatasetlib.py", line 33, in transparent_to_white
for x in range(img.size()):
TypeError: 'tuple' object is not callable
I am called it in my dataset class :
class Pix2PixDataset(Dataset):
def __init__(self, data_points, transforms = None):
self.data_points = data_points
self.transforms = transforms
self.resize = T.Resize((512,512))
def __getitem__(self, index) :
image, y_label = process_images(self.data_points[index].reference_image, self.data_points[index].drawing )
image = self.resize(image)
y_label = self.resize(transparent_to_white(y_label))
if self.transforms:
image = self.transforms(image)
y_label = self.transforms(y_label)
return(image, y_label)
def __len__(self):
return len(self.data_points)
I tried removing the open and close parenthesis but that did not help, I still get the same error
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/fetch.py", line 58, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/fetch.py", line 58, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/content/gdrive/My Drive/All_Deep_Learning/PythonCustomLibraries/pix2pixdatasetlib.py", line 49, in __getitem__
y_label = self.resize(transparent_to_white(y_label))
File "/content/gdrive/My Drive/All_Deep_Learning/PythonCustomLibraries/pix2pixdatasetlib.py", line 33, in transparent_to_white
for x in range(img.size()):
TypeError: 'tuple' object is not callable
Disclaimer: I'm assuming img is an instance of Image class, from module PIL or it's fork Pillow
img.size is a tuple. For example, if you do:
print(img.size)
It prints a tuple with (width, height).
So, your code could be
def transparent_to_white(img):
color = (255, 255, 255)
width, height = img.size # unpacking width/height beforehand
for x in range(width): # using unpacked values in range
for y in range(height)): # same as above
r, g, b, a = img.getpixel((x, y))
if a == 0:
img.putpixel((x, y), color)
return img
Or, alternatively, you could store x and y into a tuple of coordinates, to simplify passing it around:
def transparent_to_white(img):
color = (255, 255, 255)
width, height = img.size # unpacking width/height beforehand
for x in range(width): # using unpacked values in range
for y in range(height)): # same as above
coords = (x, y) # tuple of coordinates
r, g, b, a = img.getpixel(coords) # used here
if a == 0:
img.putpixel(coords, color) # and here
return img
Related
I'm following a YT tutorial, and I feel I've copied the code exactly, but keep getting this error:
AttributeError: Caught AttributeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "C:\Users\Connor\Anaconda3\lib\site-packages\torch\utils\data\_utils\worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "C:\Users\Connor\Anaconda3\lib\site-packages\torch\utils\data\_utils\fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "C:\Users\Connor\Anaconda3\lib\site-packages\torch\utils\data\_utils\fetch.py", line 44, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "C:\Users\Connor\OneDrive\Python\Kaggle\Facial Keypoint Detecetion\dataset.py", line 21, in __getitem__
image = np.array(self.data.iloc[index, 30].split()).astype(np.float32)
AttributeError: 'numpy.float64' object has no attribute 'split'
Below is the full code:
class FacialKeypointDataset(Dataset):
def __init__(self, csv_file, train=True, transform=None):
super().__init__()
self.data = pd.read_csv(csv_file)
self.category_names = ['left_eye_center_x', 'left_eye_center_y', 'right_eye_center_x', 'right_eye_center_y', 'left_eye_inner_corner_x', 'left_eye_inner_corner_y', 'left_eye_outer_corner_x', 'left_eye_outer_corner_y', 'right_eye_inner_corner_x', 'right_eye_inner_corner_y', 'right_eye_outer_corner_x', 'right_eye_outer_corner_y', 'left_eyebrow_inner_end_x', 'left_eyebrow_inner_end_y', 'left_eyebrow_outer_end_x', 'left_eyebrow_outer_end_y', 'right_eyebrow_inner_end_x', 'right_eyebrow_inner_end_y', 'right_eyebrow_outer_end_x', 'right_eyebrow_outer_end_y', 'nose_tip_x', 'nose_tip_y', 'mouth_left_corner_x', 'mouth_left_corner_y', 'mouth_right_corner_x', 'mouth_right_corner_y', 'mouth_center_top_lip_x', 'mouth_center_top_lip_y', 'mouth_center_bottom_lip_x', 'mouth_center_bottom_lip_y']
self.transform = transform
self.train = train
def __len__(self):
return self.data.shape[0]
def __getitem__(self, index):
if self.train:
image = np.array(self.data.iloc[index, 30].split()).astype(np.float32)
labels = np.array(self.data.iloc[index, :30].tolist())
labels[np.isnan(labels)] = -1
else:
image = np.array(self.data.iloc[index, 1].split()).astype(np.float32)
labels = np.zeros(30)
ignore_indices = labels == -1
labels = labels.reshape(15, 2)
if self.transform:
image = np.repeat(image.reshape(96, 96, 1), 3, 2).astype(np.uint8)
augmentations = self.transform(image=image, keypoints=labels)
image = augmentations["image"]
labels = augmentations["keypoints"]
labels = np.array(labels).reshape(-1)
labels[ignore_indices] = -1
return image, labels.astype(np.float32)
if __name__ == "__main__":
ds = FacialKeypointDataset(csv_file="data/train_4.csv", train=True, transform=config.train_transforms)
loader = DataLoader(ds, batch_size=1, shuffle=True, num_workers=0)
for idx, (x, y) in enumerate(loader):
plt.imshow(x[0][0].detach().cpu().numpy(), cmap='gray')
plt.plot(y[0][0::2].detach().cpu().numpy(), y[0][1::2].detach().cpu().numpy(), "go")
plt.show()
In the tutorial it has the same lines of code, but no error. Here is the link to the Gituhub:
https://github.com/aladdinpersson/Machine-Learning-Collection/tree/master/ML/Kaggles/Facial%20Keypoint%20Detection%20Competition
Any ideas what might be causing this?
The code seems to expect that the value returned by self.data.iloc[index, 30] will always be a string.
That might be ok for the project you are basing your code on, but if you pass a csv file that has floats instead of strings it will result on the error that you got.
Convert the data to float with .astype(np.float32) first. For example,
self.data = pd.read_csv(csv_file)
self.data = self.data.astype(np.float32)
Or
self.data = pd.read_csv(csv_file, dtype=np.float64)
If you got error, it means your csv_file has data type like strings, and this program cannot be used on your input data.
An error occurred when I try to resize an image with "interpolation=cv2.INTER_CUBIC". I don't know what happend. I just following the guide at https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_geometric_transformations/py_geometric_transformations.html?highlight=resize
# from PIL import Image
import cv2
def read_img(frompath):
# return Image.open(frompath)
return cv2.imread(frompath)
def resize_one(img, size, outpath):
# out = img.resize(size)
# out.save(outpath)
out = cv2.resize(img, size)
cv2.imwrite(outpath, out, interpolation=cv2.INTER_CUBIC)
def resize_all(img, tasks):
for (size, outpath) in tasks:
resize_one(img, size, outpath)
def build_tasks(prefix, sizes):
t = []
for (x, y) in sizes:
t.append(((x, y), prefix + '_' + str(x) + '_' + str(y) + '.png'))
return t
def square_tasks(widths):
t = []
for w in widths:
t.append((w, w))
return t
def main():
s = [72, 48, 96, 144, 192]
p = 'logo'
i = './logo_1280.png'
t = build_tasks(p, square_tasks(s))
img = read_img(i)
resize_all(img, t)
if __name__ == '__main__':
main()
libpng warning: iCCP: known incorrect sRGB profile
Traceback (most recent call last):
File ".\main.py", line 39, in <module>
main()
File ".\main.py", line 36, in main
resize_all(img, t)
File ".\main.py", line 16, in resize_all
resize_one(img, size, outpath)
File ".\main.py", line 12, in resize_one
cv2.imwrite(outpath, out, interpolation=cv2.INTER_CUBIC)
TypeError: 'interpolation' is an invalid keyword argument for this function
I'm using opencv-contrib-python 4.2.0.34
You have to do interpolation in resize instead of your imwrite.
def resize_one(img, size, outpath):
# out = img.resize(size)
# out.save(outpath)
out = cv2.resize(img, size, interpolation=cv2.INTER_CUBIC )
cv2.imwrite(outpath, out)
import numpy as np
from PIL import ImageGrab
import cv2
import time
import pyautogui
import matplotlib.pyplot as plt
def make_coords(img,line_param):
slope,intercept=line_param
y1 = img.shape[0]
y2 = int((y1*(3/5)))
x1 = int((y1-intercept)/slope)
x2 = int((y2-intercept)/slope)
try:
return np.array((x1,y1,x2,y2)) #HERE IS WHERE THE PROBLEM HAPPENS
except UnboundLocalError:
pass
def avg_slope(img,lines):
left_fit =[]
right_fit=[]
if lines is not None:
for line in lines:
x1,y1,x2,y2=line.reshape(4)
parameters = np.polyfit((x1,x2),(y1,y2),1)
try:
slope = parameters[0]
except TypeError:
slope = 0
try:
intercept = parameters[1]
except TypeError:
intercept = 0
if slope <0:
left_fit.append((slope,intercept))
else:
right_fit.append((slope,intercept))
if left_fit:
left_fit_avg=np.average(left_fit,axis=0)
left_line=make_coords(img,left_fit_avg)
if right_fit:
right_fit_avg=np.average(right_fit,axis=0)
right_line=make_coords(img,right_fit_avg)
return np.array((x1,y1,x2,y2))
def draw_lines(img, lines):
try:
for line in lines:
if line is not None:
coords = line[0]
cv2.line(img, (coords[0],coords[1]), (coords[2],coords[3]), [255,0,0], 3)
except:
pass
def roi(img):
vertices = np.array([[10,500],[10,300], [300,200], [500,200], [800,300], [800,500]], np.int32)
mask = np.zeros_like(img)
cv2.fillPoly(mask, [vertices], 255)
masked = cv2.bitwise_and(img, mask)
return masked
def process_img(image):
original_image = image
# convert to gray
processed_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# edge detection
processed_img = cv2.GaussianBlur(processed_img,(5,5),0) #new
processed_img = cv2.Canny(processed_img, threshold1 = 50, threshold2=150) #new
# processed_img = cv2.Canny(processed_img, threshold1 = 200, threshold2=300)
lines = cv2.HoughLinesP(processed_img, 1, np.pi/180, 180, np.array([]), minLineLength=15,maxLineGap=5)
avg_lines = avg_slope(processed_img,lines)
draw_lines(process_img,avg_lines)
processed_img = roi(processed_img)
return processed_img
def main():
last_time = time.time()
while True:
screen = np.array(ImageGrab.grab(bbox=(0,40,800,640)))
if screen is not None:
new_screen = process_img(screen)
print('Frame took {} seconds'.format(time.time()-last_time))
cv2.imshow('window', new_screen)
else:
pass
last_time = time.time()
# plt.imshow(new_screen)
#cv2.imshow('window',cv2.cvtColor(screen, cv2.COLOR_BGR2RGB))
# cv2.waitKey(0)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
main()
THE TERMINAL SHOWS:
avg_lines = avg_slope(processed_img,lines)
Frame took 0.12310576438903809 seconds
Traceback (most recent call last):
File "c:/Users/Nicole/Documents/Python Scripts/matetest.py", line 107, in <module>
main()
File "c:/Users/Nicole/Documents/Python Scripts/matetest.py", line 91, in main 91,
in main
new_screen = process_img(screen) 78, in process_img
File "c:/Users/Nicole/Documents/Python Scripts/matetest.py", line 78, in process_img 50, in avg_slope
avg_lines = avg_slope(processed_img,lines)
File "c:/Users/Nicole/Documents/Python Scripts/matetest.py", line 50, in avg_slope
return np.array((x1,y1,x2,y2))
UnboundLocalError: local variable 'x1' referenced before assignment
... even though I'm doing ...
try:
return np.array((x1,y1,x2,y2))
except UnboundLocalError:
pass
Your Error is actually not occuring where you say it is. By looking at the Traceback you can see that the error is occuring in the function avg_slope.
It might be because you use return np.array((x1,y1,x2,y2)) while in that function you have only declared these values inside an if statement. If the if block would be skipped (when lines is None) then x1, x2, y1and y2 haven't been declared in the function. In other words: it could be that these never exist inside the function, so you can't return something depending on them. The interpreter prevents you from doing this.
You can learn a lot by just reading the error message carefully. Local variable referenced before assignment is in a nutshell what I explained above.
Your problem is here:
def avg_slope(img,lines):
left_fit =[]
right_fit=[]
if lines is not None:
for line in lines:
x1,y1,x2,y2=line.reshape(4)
If lines is "falsey" (empty or None), you never assign to x1.
The following code uses audio files to create a matrix of features in tensorflow:
import tensorflow as tf
directory = "audio_dataset/*.wav"
filenames = tf.train.match_filenames_once(directory)
init = (tf.global_variables_initializer(), tf.local_variables_initializer())
count_num_files = tf.size(filenames)
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.WholeFileReader()
filename, file_contents = reader.read(filename_queue)
with tf.Session() as sess:
sess.run(init)
num_files = sess.run(count_num_files)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(num_files):
audio_file = sess.run(filename)
print(audio_file)
this is a toolkit that converts audio from time to frequency domain:
from bregman.suite import *
chromo = tf.placeholder(tf.float32)
max_freqs = tf.argmax(chromo, 0)
def get_next_chromogram(sess):
audio_file = sess.run(filename)
F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
return F.X
def extract_feature_vector(sess, chromo_data):
num_features, num_samples = np.shape(chromo_data)
freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
hist, bins = np.histogram(freq_vals, bins=range(num_features + 1))
return hist.astype(float) / num_samples
def get_dataset(sess):
num_files = sess.run(count_num_files)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
xs = []
for _ in range(num_files):
chromo_data = get_next_chromogram(sess)
x = [extract_feature_vector(sess, chromo_data)]
x = np.matrix(x)
if len(xs) == 0:
xs = x
else:
xs = np.vstack((xs, x))
return xs
this clusters the data around two centroids:
k = 2
max_iterations = 100
def initial_cluster_centroids(X, k):
return X[0:k, :]
def assign_cluster(X, centroids):
expanded_vectors = tf.expand_dims(X, 0)
expanded_centroids = tf.expand_dims(centroids, 1)
distances = tf.reduce_sum(tf.square(tf.subtract(expanded_vectors, expanded_centroids)), 2)
mins = tf.argmin(distances, 0)
return mins
def recompute_centroids(X, Y):
sums = tf.unsorted_segment_sum(X, Y, k)
counts = tf.unsorted_segment_sum(tf.ones_like(X), Y, k)
return sums / counts
with tf.Session() as sess:
sess.run(init)
X = get_dataset(sess)
centroids = initial_cluster_centroids(X, k)
i, converged = 0, False
while not converged and i < max_iterations:
i += 1
Y = assign_cluster(X, centroids)
centroids = sess.run(recompute_centroids(X, Y))
print(centroids)
but Im getting the following traceback:
Traceback (most recent call last):
File "components.py", line 776, in <module>
X = get_dataset(sess)
File "ccomponents.py", line 745, in get_dataset
chromo_data = get_next_chromogram(sess)
File "coffee_components.py", line 728, in get_next_chromogram
F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features.py", line 143, in __init__
Features.__init__(self, arg, feature_params)
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 70, in __init__
self.extract()
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 213, in extract
self.extract_funs.get(f, self._extract_error)()
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 711, in _chroma
if not self._cqft():
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 588, in _cqft
self._make_log_freq_map()
File "/Volumes/Dados/Documents/Education/Programming/Machine Learning/Manning/book/BregmanToolkit-master/bregman/features_base.py", line 353, in _make_log_freq_map
mxnorm = P.empty(self._cqtN) # Normalization coefficients
TypeError: 'float' object cannot be interpreted as an index
as far as I'm concerned, rangeis an intand not a float.
could someone please point me the error here?
The problem is that you're using Python 3, but the Bregman Toolkit was written in Python 2. The error comes from this line:
mxnorm = P.empty(self._cqtN)
self._cqtN is a float. In Python 2, the pylab library accepts floats as input:
pylab.empty(5.0)
__main__:1: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
array([ 0., 0., 0., 0., 0.])
However, in Python 3 you get the same error as you do:
pylab.empty(5.0)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'float' object cannot be interpreted as an integer
You should be able to fix this error by just editing the line in the file I linked above and cast it to an int:
mxnorm = P.empty(int(self._cqtN))
However, I'd be surprised if there weren't any other errors due to the incompatible versions. You might want to try using Python 2 or look for an alternative to the Bregman Toolkit.
You need to change castself._cqtN to int in line 353 and 357 in feature_base.py
There are
mxnorm = P.empty(int(self._cqtN))
and
for i in P.arange(int(self._cqtN))])
I need to flip a picture horizontally, without using the reverse function, I thought I had it right but the error I get is
Traceback (most recent call last):
File "<pyshell#9>", line 1, in <module>
Flip("bm.gif","bm.ppm")
File "C:\Users\....ImageProcessingSKLT.py", line 133, in Flip
pic1 = graphics.getPixel(x,y)
AttributeError: 'module' object has no attribute 'getPixel'
The code I have is
def Flip(image1, image2):
img = graphics.Image(graphics.Point(0, 0), image1)
X = img.getWidth()
Y = img.getHeight()
for y in range(Y//2):
for x in range(X):
pic1 = graphics.getPixel(x,y)
pic2 = graphics.setPixel(X-x,y)
temp = graphics.getColor(pic1)
graphics.setColor(pic1,getColor(pic2))
graphics.setColor(pic2,temp)
image2 = pic2
return image2
What does the error mean? and how do I fix it?
pic1 = graphics.getPixel(x,y)
pic2 = graphics.setPixel(X-x,y)
Probably should be:
pic1 = img.getPixel(x,y)
pic2 = img.setPixel(X-x,y)
The interpreter is complaining that it can't find the getPixel function inside the module graphics; it's img.getPixel, not graphics.getPixel.