I am a beginner in Python. I have been trying my hands on MatPlotLib to compare the stats of soccer players in FIFA 20. Basically the problem I'm facing is:
def make_graph(value1, value2, namevalue, label1, label2):
print(value1, value2, namevalue)
plt.scatter(value1, value2)
plt.xlabel(label1)
plt.ylabel(label2)
for i in range(len(namevalue)):
plt.text(value1[i] + 0.3, value2[i] + 0.3, namevalue[i], fontdict=dict(color='red', size=10), bbox=dict(facecolor = 'yellow', alpha=0.5))
plt.xlim(min(value1) - 5, max(value2) + 5)
plt.ylim(min(value1) - 5, max(value2) + 5)
plt.show()
def Test():
df = xlrd.open_workbook(path)
data = df.sheet_by_index(0)
data.cell_value(0,0)
name = []
pace = []
shoot = []
for i in range(1, 450):
#print(data.cell_value(i, 3))
buff = str(data.cell_value(i,2)).strip()
if buff == "LM" or buff == "RM":
pacebuffer = int(data.cell_value(i, 4))
shootbuffer = int(data.cell_value(i, 5))
if pacebuffer >= 90:
name.append(data.cell_value(i, 3).strip("\n"))
pace.append(pacebuffer)
shoot.append(shootbuffer)
#print(name)
make_graph(pace, shoot, name, "Pace", "Shoot")
The particular code is showing me an empty graph.
BUT
When I write the same piece of code inside Test() which I wrote inside make_graph() , it gives me the desired output.
But in this way I have to rewrite that plotting thing every time I write some other functions and that's really a problem. Any idea how to fix this?
It is your x and y lims :
plt.xlim(min(value1) - 5, max(value2) + 5)
plt.ylim(min(value1) - 5, max(value2) + 5)
You should change to :
plt.xlim(min(value1) - 5, max(value1) + 5)
plt.ylim(min(value2) - 5, max(value2) + 5)
Technically your plt.scatter was working but then your x and y lims meant that you couldn't see.
Related
I have a data frame like the below:
Every row represents a person. They stay at 3 different locations for some time given on the dataframe. The first few people don't stay at location1 but they "born" at location2. The rest of them stay at every locations (3 locations).
I would like to animate every person at the given X, Y coordinates given on the data frame and represent them as dots or any other shape. Here is the flow:
Every person should appear at the first given location (location1) at the given time. Their color should be blue at this state.
Stay at location1 until location2_time and then appear at location2. Their color should be red at this state.
Stay at location2 until location3_time and then appear at location3. Their color should be red at this state.
Stay at location3 for 3 seconds and disappear forever.
There can be several people on the visual at the same time. How can I do that?
There are some good answers on the below links. However, on these solutions, points don't disappear.
How can i make points of a python plot appear over time?
How to animate a scatter plot?
The following is an implementation with python-ffmpeg, pandas, matplotlib, and seaborn. You can find output video on my YouTube channel (link is unlisted).
Each frame with figures is saved directly to memory. New figures are generated only when the state of the population changes (person appears/moves/disappears).
You should definetely separate this code into smaller chunks if you are using this in a Python package:
from numpy.random import RandomState, SeedSequence
from numpy.random import MT19937
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import ffmpeg
RESOLUTION = (12.8, 7.2) # * 100 pixels
NUMBER_OF_FRAMES = 900
class VideoWriter:
# Courtesy of https://github.com/kylemcdonald/python-utils/blob/master/ffmpeg.py
def __init__(
self,
filename,
video_codec="libx265",
fps=15,
in_pix_fmt="rgb24",
out_pix_fmt="yuv420p",
input_args=None,
output_args=None,
):
self.filename = filename
self.process = None
self.input_args = {} if input_args is None else input_args
self.output_args = {} if output_args is None else output_args
self.input_args["r"] = self.input_args["framerate"] = fps
self.input_args["pix_fmt"] = in_pix_fmt
self.output_args["pix_fmt"] = out_pix_fmt
self.output_args["vcodec"] = video_codec
def add(self, frame):
if self.process is None:
height, width = frame.shape[:2]
self.process = (
ffmpeg.input(
"pipe:",
format="rawvideo",
s="{}x{}".format(width, height),
**self.input_args,
)
.filter("crop", "iw-mod(iw,2)", "ih-mod(ih,2)")
.output(self.filename, **self.output_args)
.global_args("-loglevel", "quiet")
.overwrite_output()
.run_async(pipe_stdin=True)
)
conv = frame.astype(np.uint8).tobytes()
self.process.stdin.write(conv)
def close(self):
if self.process is None:
return
self.process.stdin.close()
self.process.wait()
def figure_to_array(figure):
"""adapted from: https://stackoverflow.com/questions/21939658/"""
figure.canvas.draw()
buf = figure.canvas.tostring_rgb()
n_cols, n_rows = figure.canvas.get_width_height()
return np.frombuffer(buf, dtype=np.uint8).reshape(n_rows, n_cols, 3)
# Generate data for the figure
rs1 = RandomState(MT19937(SeedSequence(123456789)))
time_1 = np.round(rs1.rand(232) * NUMBER_OF_FRAMES).astype(np.int16)
time_2 = time_1 + np.round(rs1.rand(232) * (NUMBER_OF_FRAMES - time_1)).astype(np.int16)
time_3 = time_2 + np.round(rs1.rand(232) * (NUMBER_OF_FRAMES - time_2)).astype(np.int16)
loc_1_x, loc_1_y, loc_2_x, loc_2_y, loc_3_x, loc_3_y = np.round(rs1.rand(6, 232) * 100, 1)
df = pd.DataFrame({
"loc_1_time": time_1,
"loc_1_x": loc_1_x,
"loc_1_y": loc_1_y,
"loc_2_time": time_2,
"loc_2_x": loc_2_x,
"loc_2_y": loc_2_y,
"loc_3_time": time_3,
"loc_3_x": loc_3_x,
"loc_3_y": loc_3_y,
})
"""The stack answer starts here"""
# Add extra column for disappear time
df["disappear_time"] = df["loc_3_time"] + 3
all_times = df[["loc_1_time", "loc_2_time", "loc_3_time", "disappear_time"]]
change_times = np.unique(all_times)
# Prepare ticks for plotting the figure across frames
x_values = df[["loc_1_x", "loc_2_x", "loc_3_x"]].values.flatten()
x_ticks = np.array(np.linspace(x_values.min(), x_values.max(), 6), dtype=np.uint8)
y_values = df[["loc_1_y", "loc_2_y", "loc_3_y"]].values.flatten()
y_ticks = np.array(np.round(np.linspace(y_values.min(), y_values.max(), 6)), dtype=np.uint8)
sns.set_theme(style="whitegrid")
video_writer = VideoWriter("endermen.mp4")
if 0 not in change_times:
# Generate empty figure if no person arrive at t=0
fig, ax = plt.subplots(figsize=RESOLUTION)
ax.set_xticklabels(x_ticks)
ax.set_yticklabels(y_ticks)
ax.set_title("People movement. T=0")
video_writer.add(figure_to_array(fig))
loop_range = range(1, NUMBER_OF_FRAMES)
else:
loop_range = range(NUMBER_OF_FRAMES)
palette = sns.color_palette("tab10") # Returns three colors from the palette (we have three groups)
animation_data_df = pd.DataFrame(columns=["x", "y", "location", "index"])
for frame_idx in loop_range:
if frame_idx in change_times:
plt.close("all")
# Get person who appears/moves/disappears
indexes, loc_nums = np.where(all_times == frame_idx)
loc_nums += 1
for i, loc in zip(indexes, loc_nums):
if loc != 4:
x, y = df[[f"loc_{loc}_x", f"loc_{loc}_y"]].iloc[i]
if loc == 1: # location_1
animation_data_df = animation_data_df.append(
{"x": x, "y": y, "location": loc, "index": i},
ignore_index=True
)
else:
data_index = np.where(animation_data_df["index"] == i)[0][0]
if loc in (2, 3): # location_2 or 3
animation_data_df.loc[[data_index], :] = x, y, loc, i
elif loc == 4: # Disappear
animation_data_df.iloc[data_index] = np.nan
current_palette_size = np.sum(~np.isnan(np.unique(animation_data_df["location"])))
fig, ax = plt.subplots(figsize=RESOLUTION)
sns.scatterplot(
x="x", y="y", hue="location", data=animation_data_df, ax=ax, palette=palette[:current_palette_size]
)
ax.set_xticks(x_ticks)
ax.set_xticklabels(x_ticks)
ax.set_yticks(y_ticks)
ax.set_yticklabels(y_ticks)
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
ax.set_title(f"People movement. T={frame_idx}")
video_writer.add(figure_to_array(fig))
video_writer.close()
Edit: There was a bug in which location_3 wasn't removed after 3 seconds. Fixed now.
Modifying the code from this question to only include the positions you want automatically removes the old ones if the old position isn't included in the new ones. This doesn't change if you want to animate by time or iterations or anything else. I have opted to use iterations here since it's easier and I don't know how you are handling your dataset. The code does have one bug though, the last point (or points if they last the same amount of time) remaining won't disappear, this can be solved easily if you don't want to draw anything again, if you do though for exaple in case you there is a gap in the data with no people and then the data resumes I haven't found any workarounds
import math
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
#The t0,t1,t2,t3 are the times (in iterations) that the position changes
#If t0 is None then the person will never be displayed
people = [
# t0 x1 y1 t1 x2 y2 t2 x3 y3 t4
[ 0, 1, 0.1, 1, 2, 0.2, 2, 3, 0.3, 3],
[ 2, None, None, None, 2, 1, 3, 4, 1, 7],
[ 2, float("NaN"), float("NaN"), float("NaN"), 2, 0.8, 4, 4, 0.8, 10],
]
fig = plt.figure()
plt.xlim(0, 5)
plt.ylim(0, 1)
graph = plt.scatter([], [])
def animate(i):
points = []
colors = []
for person in people:
if person[0] is None or math.isnan(person[0]) or i < person[0]:
continue
# Position 1
elif person[3] is not None and not (math.isnan(person[3])) and i <= person[3]:
new_point = [person[1], person[2]]
color = "b"
# Position 2
elif person[6] is not None and not (math.isnan(person[6])) and i <= person[6]:
new_point = [person[4], person[5]]
color = "r"
# Position 3
elif person[9] is not None and not (math.isnan(person[9])) and i <= person[9]:
new_point = [person[7], person[8]]
color = "r"
else:
people.remove(person)
new_point = []
if new_point != []:
points.append(new_point)
colors.append(color)
if points != []:
graph.set_offsets(points)
graph.set_facecolors(colors)
else:
# You can use graph.remove() to fix the last point not disappiring but you won't be able to plot anything after that
# graph.remove()
pass
return graph
ani = FuncAnimation(fig, animate, repeat=False, interval=500)
plt.show()
I's trying to plot multiple text lines on a plot using arrays. I define xpos[i], ypos[i], text[i], and xval[i], use the following loop to plot the text, then change my slider:
npts = 3
i = 0
mytext = np.zeros(npts,str)
xval = [1, 2, 3]
xval = [Bx, By, Beta]
yinit = 5
ydel = -0.5
xpos = [1, 1, 1]
ypos = [yinit, yinit+ydel, yinit+2*ydel]
text = ['a = %.2f','b = %.2f','c = %.2f']
# The following loop plots the text and works fine
while i < npts:
mytext[i] = plt.text(xpos[i], ypos[i], text[i] % xval[i])
i += 1
svalue.on_changed(update)
My update def has the following line which should update the text based on changes to xval[i]:
def update(Beta):
By = x3*np.tan(Beta * torad)
Bx = x3
line.set_xdata((x1, Bx))
line.set_ydata((y1, By))
npts = 3
i = 0
xval = [Bx, By, Beta]
while i < npts:
mytext[i].set_text(text[i] % xval[i])
i += 1
I get the error:
AttributeError: 'numpy.str_' object has no attribute 'set_text'
I hope this is clear. I'm unable find any references on folks trying use arrays to define multiple plt.text statements.
Thanks.
setting
mytext = [None]*10
based on suggestion by ImportanceOfBeingErnest solved the problem. Thanks.
It's a program of import multiple images and extract feature using dct and histogram.
1) Import multiple images from folder
2) Make image size 256*256
3) Use image of 64*64 block unit with stride = 32
4) Do dct(8*8 size)
5) make histogram of dct
6) Extract features from dct coefficient histogram
The problem is that it's too slow.
I think it's because there's so many "for loop".
This is my full-code in python.
How can I change my code to speed up?
I am not familiar with python.
Please help me
import numpy as np
from scipy.fftpack import dct
from PIL import Image
import glob
import matplotlib.pyplot as plt
def find_index(x,key):
for i in range(0,len(x)):
if x[i] == key :
return i
else:
i = i+1
def image_open(path):
image_list = []
#for filename in glob.glob('path/*.jpg'):
for filename in glob.glob(path+'/*.jpg'):
im=Image.open(filename)
image_list.append(im)
return image_list
def dct_2(img):
#Get 2D Cosine Transform of Image
return dct(dct(np.asarray(img).T, norm='ortho').T, norm='ortho')
def return_array(array):
zero = [0.0, 0.0, 0.0, 0.0, 0.0]
range = int((max(array)) - min(array))
x, bins, patch = plt.hist(array, bins=range)
x = list(zero) + list(x) + list(zero)
return x
path = 'C:\\Users\\LG\\PycharmProjects\\photo' #folder that contains many images
images = image_open(path)
row = 0
array_matrix = []
label_matrix = []
for i in range(0, len(images)): #access image
box3 = (0,0,256,256)
a = images[i].crop(box3)
(y,cb,cr) = a.split() #ycbcr
width , height = y.size
y.show()
for q in range(0, height-32 , 32): #use image 64*64 block unit
for w in range(0 , width-32 ,32):
box1 =(q,w,q+64,w+64)
block = y.crop(box1)
array1 , array2 , array3 , array4 , array5 , array6 , array7 , array8 ,array9 = [],[],[],[],[],[],[],[],[]
for j in range(0,64,8): #dct
for n in range(0,64,8):
box2 = (j,n,j+8,n+8)
temp = block.crop(box2)
dct_temp = dct_2(temp)
array1.append(dct_temp[0,1])
array2.append(dct_temp[1,0])
array3.append(dct_temp[0,2])
array4.append(dct_temp[1,1])
array5.append(dct_temp[2,0])
array6.append(dct_temp[0,3])
array7.append(dct_temp[1,2])
array8.append(dct_temp[2,1])
array9.append(dct_temp[3,0])
x1 = return_array(array1) #extract feature from dct histogram
index = find_index(x1, max(x1))
u = [index - 5, index + 5, 1]
array_matrix.append(x1[u[0]:u[1] + 1:u[2]])
x2 = return_array(array2)
index = find_index(x2, max(x2))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x2[u[0]:u[1] + 1:u[2]])
x3 = return_array(array3)
index = find_index(x3, max(x3))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x3[u[0]:u[1] + 1:u[2]])
x4 = return_array(array4)
index = find_index(x4, max(x4))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x4[u[0]:u[1] + 1:u[2]])
x5 = return_array(array5)
index = find_index(x5, max(x5))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x5[u[0]:u[1] + 1:u[2]])
x6 = return_array(array6)
index = find_index(x6, max(x6))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x6[u[0]:u[1] + 1:u[2]])
x7 = return_array(array7)
index = find_index(x7, max(x7))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x7[u[0]:u[1] + 1:u[2]])
x8 = return_array(array8)
index = find_index(x8, max(x8))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x8[u[0]:u[1] + 1:u[2]])
x9 = return_array(array9)
index = find_index(x9, max(x9))
u = [index - 5, index + 5, 1]
array_matrix[row].extend(x9[u[0]:u[1] + 1:u[2]])
print(w/32)
row = row+1
print(array_matrix)
Rather than assuming that a specific section is taking longer than others, I'd recommend profiling your script. A profiler will collect metrics on how long certain parts of your program takes, and also allow you to better see how much any changes affect the code (makes it better, worse, etc).
Once you know where your problem lies, then you can take a more targeted approach at making it faster.
Have a look at the profiling module: https://docs.python.org/2/library/profile.html
Also have a look at some tutorials:
https://julien.danjou.info/blog/2015/guide-to-python-profiling-cprofile-concrete-case-carbonara
https://zapier.com/engineering/profiling-python-boss/
https://marcobonzanini.com/2015/01/05/my-python-code-is-slow-tips-for-profiling/
I've got two musical files: one lossless with little sound gap (at this time it's just silence but it could be anything: sinusoid or just some noise) at the beginning and one mp3:
In [1]: plt.plot(y[:100000])
Out[1]:
In [2]: plt.plot(y2[:100000])
Out[2]:
This lists are similar but not identical so I need to cut this gap, to find the first occurrence of one list in another with lowest delta error.
And here's my solution (5.7065 sec.):
error = []
for i in range(25000):
y_n = y[i:100000]
y2_n = y2[:100000-i]
error.append(abs(y_n - y2_n).mean())
start = np.array(error).argmin()
print(start, error[start]) #23057 0.0100046
Is there any pythonic way to solve this?
Edit:
After calculating the mean distance between special points (e.g. where data == 0.5) I reduce the area of search from 25000 to 2000. This gives me reasonable time of 0.3871s:
a = np.where(y[:100000].round(1) == 0.5)[0]
b = np.where(y2[:100000].round(1) == 0.5)[0]
mean = int((a - b[:len(a)]).mean())
delta = 1000
error = []
for i in range(mean - delta, mean + delta):
...
What you are trying to do is a cross-correlation of the two signals.
This can be done easily using signal.correlate from the scipy library:
import scipy.signal
import numpy as np
# limit your signal length to speed things up
lim = 25000
# do the actual correlation
corr = scipy.signal.correlate(y[:lim], y2[:lim], mode='full')
# The offset is the maximum of your correlation array,
# itself being offset by (lim - 1):
offset = np.argmax(corr) - (lim - 1)
You might want to take a look at this answer to a similar problem.
Let's generate some data first
N = 1000
y1 = np.random.randn(N)
y2 = y1 + np.random.randn(N) * 0.05
y2[0:int(N / 10)] = 0
In these data, y1 and y2 are almost the same (note the small added noise), but the first 10% of y2 are empty (similarly to your example)
We can now calculate the absolute difference between the two vectors and find the first element for which the absolute difference is below a sensitivity threshold:
abs_delta = np.abs(y1 - y2)
THRESHOLD = 1e-2
sel = abs_delta < THRESHOLD
ix_start = np.where(sel)[0][0]
fig, axes = plt.subplots(3, 1)
ax = axes[0]
ax.plot(y1, '-')
ax.set_title('y1')
ax.axvline(ix_start, color='red')
ax = axes[1]
ax.plot(y2, '-')
ax.axvline(ix_start, color='red')
ax.set_title('y2')
ax = axes[2]
ax.plot(abs_delta)
ax.axvline(ix_start, color='red')
ax.set_title('abs diff')
This method works if the overlapping parts are indeed "almost identical". You will have to think of smarter alignment ways if the similarity is low.
I think what you are looking for is correlation. Here is a small example.
import numpy as np
equal_part = [0, 1, 2, 3, -2, -4, 5, 0]
y1 = equal_part + [0, 1, 2, 3, -2, -4, 5, 0]
y2 = [1, 2, 4, -3, -2, -1, 3, 2]+y1
np.argmax(np.correlate(y1, y2, 'same'))
Out:
7
So this returns the time-difference, where the correlation between both signals is at its maximum. As you can see, in the example the time difference should be 8, but this depends on your data...
Also note that both signals have the same length.
I am new to Python and I still don't know what exactly Qimage's pixel returns (it seems to a tupel of rgb or rgba -the lack of type declaration doesn't help)
I want to grab each each pixel and change it.
newqim = QImage(imWidth, imHeight, QImage.Format_ARGB32)
for xstep in range(0, imWidth - 1):
for ystep in range(0, imHeight - 1):
pixelValueTuple = im.getpixel((xstep, ystep))
pixelR = pixelValueTuple[0]
pixelG = pixelValueTuple[1]
pixelB = pixelValueTuple[2]
copiedValue = qRgb(pixelR, pixelG, pixelB)
newqim.setPixel(xstep, ystep, copiedValue)
Above is the provided code ,I thought I then iterate over that newqim, but I can't get a handle on how exactly I would do that in Python.
for xstep in range(0, imWidth-1):
for ystep in range(0, imHeight -1):
I'm not sure I understood what you want, but since you are new to Python, here go a few tips...
Unpacking
This code:
pixelR = pixelR[0]
pixelG = pixelValueTuple[1]
pixelB = pixelValueTuple[2]
Is the same as:
pixelR, pixelG, pixelB = pixelValueTuple[:3]
If you are sure len(pixelValueTuple) == 3, then it is just:
pixelR, pixelG, pixelB = pixelValueTuple
PEP-8
A bit of nitpick, but python guys tend to be a little nazy about syntax. Please read PEP-8. From now on I'm naming variables according to it (camelCase for instance variables just hurt my eyes %-).
Range
You probably want range(width) instead of range(0, width-1).
>>> range(10)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> range(0, 10 - 1)
[0, 1, 2, 3, 4, 5, 6, 7, 8]
Now back to your problem.
width, height = 300, 300
im = QImage(width, height, QImage.Format_ARGB32)
for x in range(im.width()):
for y in range(im.height()):
r, g, b, a = QColor(im.pixel(x ,y)).getRgb()
# ... do something to r, g, b, a ...
im.setPixel(x, y, QColor(r, g, b, a).rgb())
Example
width, height = 100, 100
im = QImage(width, height, QImage.Format_ARGB32)
for x in range(im.width()):
for y in range(im.height()):
im.setPixel(x, y, QColor(255, x * 2.56, y * 2.56, 255).rgb())
im.save('sample.png')
Result: