Someone can help me to correct the code below to visualize this data with animated matplotlib?
The dataset for X and Y axis are describe below.
X- Range
mydata.iloc[:,[4]].head(10)
Min_pred
0 1.699189
1 0.439975
2 2.989244
3 2.892075
4 2.221990
5 3.456261
6 2.909323
7 -0.474667
8 -1.629343
9 2.283976
Y - range
dataset_meteo.iloc[:,[2]].head(10)
Out[122]:
Min
0 0.0
1 -1.0
2 2.0
3 -2.0
4 -4.0
5 -4.0
6 -5.0
7 -7.0
8 -3.0
9 -1.0
I've tried the code below,
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
d = pd.read_excel("mydata.xls")
x = np.array(d.index)
y = np.array(d.iloc[:,[2]])
mydata = pd.DataFrame(y,x)
fig = plt.figure(figsize=(10,6))
plt.xlim(1999, 2016)
plt.ylim(np.min(x), np.max(x))
plt.xlabel('Year',fontsize=20)
plt.ylabel(title,fontsize=20)
plt.title('Meteo Paris',fontsize=20)
def animate(i):
data = mydata.iloc[:int(i+1)] #select data range
p = sns.lineplot(x=data.index, y=data[title], data=data, color="r")
p.tick_params(labelsize=17)
plt.setp(p.lines,linewidth=7)
ani = matplotlib.animation.FuncAnimation(fig, animate, frames=17, repeat=True)
The idea is to create a graph where the predicted (Y) would be animated
kind a same like this one in the link below.
https://www.courspython.com/animation-matplotlib.html
Thanks if you can help
Is this what you are trying to get?
x = np.arange(1999,2017)
y = np.random.random(size=x.shape)
fig = plt.figure(figsize=(4,3))
plt.xlim(1999, 2016)
plt.ylim(np.min(y), np.max(y))
plt.xlabel('Year',fontsize=20)
plt.ylabel('Y',fontsize=20)
plt.title('Meteo Paris',fontsize=20)
plt.tick_params(labelsize=17)
line, = plt.plot([],[],'r-',lw=7)
def animate(i):
x_, y_ = x[:i+1],y[:i+1]
line.set_data(x_,y_)
return line,
ani = matplotlib.animation.FuncAnimation(fig, animate, frames=len(x), repeat=True)
Related
I got a 3D scatterplot which looks like "tubes", what it in fact should display. Currently every "tube" consist out of 40 markers. What I am trying is, that these 40 markes together built a cylinder, that looks like a tube with the positional arguments from X, Yand Z and the coloration from C.
X = df['Tube']
Y = df['Window']
C = df['Value']
Z = df['Depth']
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter3D(X,Y,Z, marker='o',s=50, c=C, cmap = 'Reds',edgecolors= "black")
df
Tube Window Value Depth
0 1 1 0.000383 -0.1
1 1 2 0.023253 -0.1
2 1 3 0.022623 -0.1
3 1 4 0.003599 -0.1
4 1 5 0.001281 -0.1
... ... ... ... ...
2155 54 36 0.020977 -1.2
2156 54 37 0.000000 -1.2
2157 54 38 0.007104 -1.2
2158 54 39 0.015233 -1.2
2159 54 40 0.000000 -1.2
Does anybody has any idea how this might be possible?
It seems to work with mayavi.mlap.
from mayavi.mlab import *
from mayavi import mlab
from PyQt5 import QtWidgets
X = df['Tube']
Y = df['Window']
C = df['Value']
Z = df['Depth']
plot3d(X, Y, Z, C, tube_radius=0.25, colormap='Reds')
mlab.show()
I'm trying to plot the data (see below). With company_name on the x-axis, status_mission_2_y on the y axis and percentage on the other y_axis. I have tried using the twinx() fucntion but I can't get it to work.
Please can you help? Thanks in advance!
def twinplot(data):
x_ = data.columns[0]
y_ = data.columns[1]
y_2 = data.columns[2]
data1 = data[[x_, y_]]
data2 = data[[x_, y_2]]
plt.figure(figsize=(15, 8))
ax = sns.barplot(x=x_, y=y_, data=data1)
ax2 = ax.twinx()
g2 = sns.barplot(x=x_, y=y_2, data=data2, ax=ax2)
plt.show()
data = ten_company_missions_failed
twinplot(data)
company_name
percentage
status_mission_2_y
EER
1
1
Ghot
1
1
Trv
1
1
Sandia
1
1
Test
1
1
US Navy
0.823529412
17
Zed
0.8
5
Gov
0.75
4
Knight
0.666666667
3
Had
0.666666667
3
Seaborn plots the two bar plots with the same color and on the same x-positions.
The following example code resizes the bar widths, with the bars belonging ax moved to the left. And the bars of ax2 moved to the right. To differentiate the right bars, a semi-transparency (alpha=0.7) and hatching is used.
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import pandas as pd
import seaborn as sns
from io import StringIO
data_str = '''company_name percentage status_mission_2_y
EER 1 1
Ghot 1 1
Trv 1 1
Sandia 1 1
Test 1 1
"US Navy" 0.823529412 17
Zed 0.8 5
Gov 0.75 4
Knight 0.666666667 3
Had 0.666666667 3'''
data = pd.read_csv(StringIO(data_str), delim_whitespace=True)
x_ = data.columns[0]
y_ = data.columns[1]
y_2 = data.columns[2]
data1 = data[[x_, y_]]
data2 = data[[x_, y_2]]
plt.figure(figsize=(15, 8))
ax = sns.barplot(x=x_, y=y_, data=data1)
width_scale = 0.45
for bar in ax.containers[0]:
bar.set_width(bar.get_width() * width_scale)
ax.yaxis.set_major_formatter(PercentFormatter(1))
ax2 = ax.twinx()
sns.barplot(x=x_, y=y_2, data=data2, alpha=0.7, hatch='xx', ax=ax2)
for bar in ax2.containers[0]:
x = bar.get_x()
w = bar.get_width()
bar.set_x(x + w * (1- width_scale))
bar.set_width(w * width_scale)
plt.show()
A simpler alternative could be to combine a barplot on ax and a lineplot on ax2.
plt.figure(figsize=(15, 8))
ax = sns.barplot(x=x_, y=y_, data=data1)
ax.yaxis.set_major_formatter(PercentFormatter(1))
ax2 = ax.twinx()
sns.lineplot(x=x_, y=y_2, data=data2, marker='o', color='crimson', lw=3, ax=ax2)
plt.show()
I'm trying to interpolate temperature data observed on an urban area formed by 5 locations. I am using cartopy to interpolate and draw the map, however, when I run the script the temperature interpolation is not shown and I only get the layer of the urban area with the color palette. Can someone help me fix this error? The link of shapefile is
https://www.dropbox.com/s/0u76k3yegvr09sx/LimiteAMG.shp?dl=0
https://www.dropbox.com/s/yxsmm3v2ey3ngsp/LimiteAMG.cpg?dl=0
https://www.dropbox.com/s/yx05n31dfkggbb6/LimiteAMG.dbf?dl=0
https://www.dropbox.com/s/a6nk0xczgjeen2d/LimiteAMG.prj?dl=0
https://www.dropbox.com/s/royw7s51n2f0a6x/LimiteAMG.qpj?dl=0
https://www.dropbox.com/s/7k44dcl1k5891qc/LimiteAMG.shx?dl=0
Data
Lat Lon tmax
0 20.8208 -103.4434 22.8
1 20.7019 -103.4728 17.7
2 20.6833 -103.3500 24.9
3 20.6280 -103.4261 NaN
4 20.7205 -103.3172 26.4
5 20.7355 -103.3782 25.7
6 20.6593 -103.4136 NaN
7 20.6740 -103.3842 25.8
8 20.7585 -103.3904 NaN
9 20.6230 -103.4265 NaN
10 20.6209 -103.5004 NaN
11 20.6758 -103.6439 24.5
12 20.7084 -103.3901 24.0
13 20.6353 -103.3994 23.0
14 20.5994 -103.4133 25.0
15 20.6302 -103.3422 NaN
16 20.7400 -103.3122 23.0
17 20.6061 -103.3475 NaN
18 20.6400 -103.2900 23.0
19 20.7248 -103.5305 24.0
20 20.6238 -103.2401 NaN
21 20.4753 -103.4451 NaN
Code:
import cartopy
import cartopy.crs as ccrs
from matplotlib.colors import BoundaryNorm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cartopy.io.shapereader as shpreader
from metpy.calc import get_wind_components
from metpy.cbook import get_test_data
from metpy.gridding.gridding_functions import interpolate, remove_nan_observation
from metpy.plots import add_metpy_logo
from metpy.units import units
to_proj = ccrs.PlateCarree()
data=pd.read_csv('/home/borisvladimir/Documentos/Datos/EMAs/EstacionesZMG/RedZMG.csv',usecols=(1,2,3),names=['Lat','Lon','tmax'],na_values=-99999,header=0)
fname='/home/borisvladimir/Dropbox/Diversos/Shapes/LimiteAMG.shp'
adm1_shapes = list(shpreader.Reader(fname).geometries())
lon = data['Lon'].values
lat = data['Lat'].values
xp, yp, _ = to_proj.transform_points(ccrs.Geodetic(), lon, lat).T
x_masked, y_masked, t = remove_nan_observations(xp, yp, data['tmax'].values)
#Interpola temp usando Cressman
tempx, tempy, temp = interpolate(x_masked, y_masked, t, interp_type='cressman', minimum_neighbors=3, search_radius=400000, hres=35000)
temp = np.ma.masked_where(np.isnan(temp), temp)
levels = list(range(-20, 20, 1))
cmap = plt.get_cmap('viridis')
norm = BoundaryNorm(levels, ncolors=cmap.N, clip=True)
fig = plt.figure(figsize=(15, 10))
view = fig.add_subplot(1, 1, 1, projection=to_proj)
view.add_geometries(adm1_shapes, ccrs.PlateCarree(),edgecolor='black', facecolor='white', alpha=0.5)
view.set_extent([-103.8, -103, 20.3, 21.099 ], ccrs.PlateCarree())
ZapLon,ZapLat=-103.50,20.80
GuadLon,GuadLat=-103.33,20.68
TonaLon,TonaLat=-103.21,20.62
TlaqLon,TlaqLat=-103.34,20.59
TlajoLon,TlajoLat=-103.44,20.47
plt.text(ZapLon,ZapLat,'Zapopan',transform=ccrs.Geodetic())
plt.text(GuadLon,GuadLat,'Guadalajara',transform=ccrs.Geodetic())
plt.text(TonaLon,TonaLat,'Tonala',transform=ccrs.Geodetic())
plt.text(TlaqLon,TlaqLat,'Tlaquepaque',transform=ccrs.Geodetic())
plt.text(TlajoLon,TlajoLat,'Tlajomulco',transform=ccrs.Geodetic())
mmb = view.pcolormesh(tempx, tempy, temp,transform=ccrs.PlateCarree(),cmap=cmap, norm=norm)
plt.colorbar(mmb, shrink=.4, pad=0.02, boundaries=levels)
plt.show()
The problem is in the call to MetPy's interpolate function. With the setting of hres=35000, it is generating a grid spaced at 35km. However, it appears that your data points are spaced much more closely than that; together, that results in a generated grid that has only two points, as shown as the red points below (black points are the original stations with non-masked data):
The result is that it only creates two points for the grid, both of which are outside the bounds of your data points; therefore those points end up masked. If instead we set hres to something much lower, say 5km (i.e. 5000), then a much more sensible result comes out:
max min mincount maxcount
0 12 10 1 6
1 21 14 1 6
2 34 19 1 6
3 6 20 1 4
4 8 22 1 4
5 41 23 1 4
this is pandas DataFrame.
so I want like this image.
enter image description here
text label is very important.
here my code
df = pd.DataFrame({'maxcount': max_count, 'mincount': min_count, 'max': max, 'min': min})
ax = df[['maxcount', 'mincount']].plot(kind='bar')
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#create your dataframe
d= {'max':[12,21,34,6,8,41],'min':[10,14,19,20,22,23],
'mincount':[1,1,1,1,1,1],'maxcount':[6,6,6,4,4,4]}
df=pd.DataFrame(d)
#create 2 dataframes counts and max_min (1 for plotting and 1 for text)
counts=pd.DataFrame(df,columns=['maxcount','mincount'])
max_min=pd.DataFrame(df,columns=['max','min'])
#plot the counts
ax=counts[counts.columns].plot(kind='bar',colormap='Paired',figsize= (12,4))
#using zip() and ax.annotate specify where (location by means of z)
#and what (max_min or counts) you want to plot
for x,y,z in zip(max_min.iloc[:,0].values,counts.iloc[:,0].values, range(len(counts))):
ax.annotate('%.d' % x, (z-0.2, counts.iloc[z,0]), va='bottom', ha='center', fontsize=10)
ax.annotate("("'%.d' % y+")", (z-0.1, counts.iloc[z,0]), va='bottom', ha='center', fontsize=10)
for x,y,z in zip(max_min.iloc[:,1].values,counts.iloc[:,1].values, range(len(counts))):
ax.annotate('%.d' % x, (z+0.1, counts.iloc[z,1]), va='bottom', ha='center', fontsize=10)
ax.annotate("("'%.d' % y+")", (z+0.2, counts.iloc[z,1]), va='bottom', ha='center', fontsize=10)
This is the output:
I have a DataFrame with some time series. I created a correlation matrix from those time series and I'd like to create a hierarchical clustering on this correlation matrix. How can I do that?
#
# let't pretend this DataFrame contains some time series
#
df = pd.DataFrame((np.random.randn(150)).reshape(10,15))
0 1 2 13 14
0 0.369746 0.093882 -0.656211 .... -0.596936 0 0.095960
1 0.641457 1.120405 -0.468639 .... -2.070802 1 -1.254159
2 0.360756 -0.222554 0.367893 .... 0.566299 2 0.932898
3 0.733130 0.666270 -0.624351 .... -0.377017 3 0.340360
4 -0.263967 1.143818 0.554947 .... 0.220406 4 -0.585353
5 0.082964 -0.311667 1.323161 .... -1.190672 5 -0.828039
6 0.173685 0.719818 -0.881854 .... -1.048066 6 -1.388395
7 0.118301 -0.268945 0.909022 .... 0.094301 7 1.111376
8 -1.341381 0.599435 -0.318425 .... 1.053272 8 -0.763416
9 -1.146692 0.453125 0.150241 .... 0.454584 9 1.506249
#
# I can create a correlation matrix like this
#
correlation_matrix = df.corr(method='spearman')
0 1 ... 13 14
0 1.000000 -0.139394 ... 0.090909 0.309091
1 -0.139394 1.000000 ... -0.636364 0.115152
2 0.175758 0.733333 ... -0.515152 -0.163636
3 0.309091 0.163636 ... -0.248485 -0.127273
4 0.600000 -0.103030 ... 0.151515 0.175758
5 -0.078788 0.054545 ... -0.296970 -0.187879
6 -0.175758 -0.272727 ... 0.151515 -0.139394
7 0.163636 -0.042424 ... 0.187879 0.248485
8 0.030303 0.915152 ... -0.430303 0.296970
9 -0.696970 0.321212 ... -0.236364 -0.151515
10 0.163636 0.115152 ... -0.163636 0.381818
11 0.321212 -0.236364 ... -0.127273 -0.224242
12 -0.054545 -0.200000 ... 0.078788 0.236364
13 0.090909 -0.636364 ... 1.000000 0.381818
14 0.309091 0.115152 ... 0.381818 1.000000
Now, how can build the Hierarchical clustering on this matrix?
Here is a step by step guide on how to build the Hierarchical Clustering and Dendrogram out of our time series using SciPy. Please note that also scikit-learn (a powerful data analysis library built on top of SciPY) has many other clustering algorithms implemented.
First we build some synthetic time series to work with. We'll build 6 groups of correlated time series and we expect the hierarchical clustering to detect those six groups.
import numpy as np
import seaborn as sns
import pandas as pd
from scipy import stats
import scipy.cluster.hierarchy as hac
import matplotlib.pyplot as plt
#
# build 6 time series groups for testing, called: a, b, c, d, e, f
#
num_samples = 61
group_size = 10
#
# create the main time series for each group
#
x = np.linspace(0, 5, num_samples)
scale = 4
a = scale * np.sin(x)
b = scale * (np.cos(1+x*3) + np.linspace(0, 1, num_samples))
c = scale * (np.sin(2+x*6) + np.linspace(0, -1, num_samples))
d = scale * (np.cos(3+x*9) + np.linspace(0, 4, num_samples))
e = scale * (np.sin(4+x*12) + np.linspace(0, -4, num_samples))
f = scale * np.cos(x)
#
# from each main series build 'group_size' series
#
timeSeries = pd.DataFrame()
ax = None
for arr in [a,b,c,d,e,f]:
arr = arr + np.random.rand(group_size, num_samples) + np.random.randn(group_size, 1)
df = pd.DataFrame(arr)
timeSeries = timeSeries.append(df)
# We use seaborn to plot what we have
#ax = sns.tsplot(ax=ax, data=df.values, ci=[68, 95])
ax = sns.tsplot(ax=ax, data=df.values, err_style="unit_traces")
plt.show()
Now we do the clustering and plot it:
# Do the clustering
Z = hac.linkage(timeSeries, method='single', metric='correlation')
# Plot dendogram
plt.figure(figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
hac.dendrogram(
Z,
leaf_rotation=90., # rotates the x axis labels
leaf_font_size=8., # font size for the x axis labels
)
plt.show()
if we want to decide what kind of correlation to apply or to use another distance metric, then we can provide a custom metric function:
# Here we use spearman correlation
def my_metric(x, y):
r = stats.pearsonr(x, y)[0]
return 1 - r # correlation to distance: range 0 to 2
# Do the clustering
Z = hac.linkage(timeSeries, method='single', metric=my_metric)
# Plot dendogram
plt.figure(figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
hac.dendrogram(
Z,
leaf_rotation=90., # rotates the x axis labels
leaf_font_size=8., # font size for the x axis labels
)
plt.show()
To retrieve the Clusters we can use the fcluster function. It can be run in multiple ways (check the documentation) but in this example we'll give it as target the number of clusters we want:
from scipy.cluster.hierarchy import fcluster
def print_clusters(timeSeries, Z, k, plot=False):
# k Number of clusters I'd like to extract
results = fcluster(Z, k, criterion='maxclust')
# check the results
s = pd.Series(results)
clusters = s.unique()
for c in clusters:
cluster_indeces = s[s==c].index
print("Cluster %d number of entries %d" % (c, len(cluster_indeces)))
if plot:
timeSeries.T.iloc[:,cluster_indeces].plot()
plt.show()
print_clusters(timeSeries, Z, 6, plot=False)
Output:
Cluster 2 number of entries 10
Cluster 5 number of entries 10
Cluster 3 number of entries 10
Cluster 6 number of entries 10
Cluster 1 number of entries 10
Cluster 4 number of entries 10