Python 3D plot for multiple dataframes - python

Assuming that I have three Python pandas DataFrames:
df_sale = pd.DataFrame([[20,30,10], [30,20,20], [20,40,40]], columns=list("ABC"))
A B C
0 20 30 10
1 30 20 20
2 20 40 40
df_people = pd.DataFrame([[2,3,1], [3,2,2], [2,4,4]], columns=list("ABC"))
A B C
0 2 3 1
1 3 2 2
2 2 4 4
df_department = pd.DataFrame([[1,2,1], [1,1,2], [2,1,1]], columns=list("ABC"))
A B C
0 1 2 1
1 1 1 2
2 2 1 1
How do I plot a 3D bar chart with all these 3 dataframes in the same place?
I want the X axis to be ['A', 'B', 'C'], Y axis to be the name of dataframes ['df_sale', 'df_people', 'df_department'], and Z axis to show the numbers.

You could use matplotlib's 3D bars.
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
columns = ['A', 'B', 'C']
df_names = ['sale', 'people', 'department']
df = [pd.DataFrame([[20,30,10], [30,20,20], [20,40,40]], columns=columns), pd.DataFrame([[2,3,1], [3,2,2], [2,4,4]], columns=columns), pd.DataFrame([[1,2,1], [1,1,2], [2,1,1]], columns=columns)]
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
#make sure x and y axis get the right tick labels
plt.xticks([i for i in range(len(columns))], columns)
plt.yticks([i for i in range(len(df_names))], df_names)
#define a list for x positions
xs = list()
for i in range(len(df)):
for j in range(len(columns)):
xs.append(i + j * 0.1)
for c1, c in enumerate(['r', 'g', 'b']):
ys = list()
for i in range(len(columns)):
ys.extend(df[c1].ix[:,i:i+1].unstack().tolist())
cs = [c] * len(xs)
ax.bar(xs, ys, zs=c1, zdir='y', color=cs, alpha=0.5, width=0.1)
plt.show()
Multicolors and legend
import matplotlib
colors = ['r', 'g', 'b', 'c', 'm', 'y', '#eeefff', '#feefff', '#aeefff']
for c1 in range(3):
ys = list()
for i in range(len(columns)):
ys.extend(df[c1].ix[:,i:i+1].unstack().tolist())
ax.bar(xs, ys, zs=c1, zdir='y', color=colors, alpha=0.5, width=0.1)
legend = list()
for i, c in enumerate(colors):
legend.append(matplotlib.patches.Patch(color=c, label='value {0} of column {1}'.format(i % 3, columns[i // 3])))
plt.legend(handles=legend, loc=4, bbox_to_anchor=(.9, 0), mode="expand")
plt.show()

Related

Why I am getting different plots for the same data file in python?

txt file
1 1 1
1 2 2
2 4 9
4 5 2
6 1 1
6 2 8
6 4 9
6 5 2
2 5 3
4 2 5
4 3 8
4 4 5
1 3 4
1 5 2
2 1 1
2 3 4
I got Fig 1 using the following code
import numpy as np
import matplotlib.pyplot as plt
data = np.genfromtxt('file1.txt', delimiter=' ')
lats = data[:,0]
lons = data[:,1] values = data[:,2]
lat_uniq, lat_idx = np.unique(lats, return_inverse=True)
lon_uniq, lon_idx = np.unique(lons, return_inverse=True)
xre, yre = np.meshgrid(lon_uniq, lat_uniq)
zre = np.full(xre.shape, np.nan)
zre[lat_idx, lon_idx] = values
print(zre)
fig, (ax1) = plt.subplots(1,1, figsize = (10, 5))
cp1 = ax1.contourf(xre, yre, zre, levels=4)
plt.colorbar(cp1, ax=ax1)
ax1.set_title("data are not interpolated") plt.show()
and I got fig2 by using the below code and same data file
import numpy as np
import matplotlib.pyplot as plt
data = np.genfromtxt('test.txt', delimiter=' ')
lats = data[:, 0]
lons = data[:, 1]
values = data[:, 2]
lat_uniq, lat_idx = np.unique(lats, return_inverse=True)
lon_uniq, lon_idx = np.unique(lons, return_inverse=True)
xre, yre = np.meshgrid(lon_uniq, lat_uniq)
# zre = np.full(xre.shape, np.nan)
zre = np.full(xre.shape, 0)
zre[lat_idx, lon_idx] = values
print(zre)
fig, (ax1) = plt.subplots(1, 1, figsize=(10, 5))
cp1 = ax1.contourf(xre, yre, zre, levels=4)
plt.colorbar(cp1, ax=ax1)
ax1.set_title("data are not interpolated")
plt.show()
Which of the code and contour plot is correct? The first, second and third column in txt file represents x, y and z values, and I want to create a contour plot using above data file. Thanks.

Make multiple barplot automatically from a dataframe

I have a dataframe where I have a variable 'Gender' (0 or 1) indicating if one is Male or Female, and another variable 'Dis' which says the state of the Disease (0,1,2 or 3).
> df.head()
Gender Dis
0 1 2
1 0 0
2 0 1
3 1 3
4 0 0
5 0 1
I want to make a barplot with the count values for each one of the'Dis' values but I want it to be separated by Gender, i.e, I want two bars for each one of the states of the disease. I want this:
However, I can't do this barplot automatically without manually writing the count values of each one. I had to check the count values for each one of the combinations aside. I produced this plot manually with the following:
X = ['0','1','2','3']
M = [43,9,20,11]
F = [118,21,168,20]
X_axis = np.arange(len(X))
plt.bar(X_axis - 0.2, M, 0.4, label = 'Male')
plt.bar(X_axis + 0.2, F, 0.4, label = 'Female')
plt.xticks(X_axis, X)
plt.xlabel("")
plt.ylabel("")
plt.legend()
plt.title("title")
def autolabel(rects):
for rect in rects:
h = rect.get_height()
ax.text(rect.get_x()+rect.get_width()/2., 1.05*h, '%d'%int(h),
ha='center', va='bottom')
plt.show()
Can I do something more "automatic" directly from the dataframe? Also, can I also display the count values on top of each bar?
Let's try with crosstab + DataFrame.plot:
plot_df = (
pd.crosstab(df['Dis'], df['Gender'])
.rename(columns={0: 'Male', 1: 'Female'})
)
ax = plot_df.plot(kind='bar', rot=0, xlabel='', ylabel='', title='title')
plt.show()
crosstab will produce the counts for Male/Female per Dis.
rename is used to turn the column names 0/1 to Male/Female:
plot_df:
Gender Male Female
Dis
0 119 128
1 140 121
2 124 120
3 112 136
Moving legend, and values on top of bars:
ax = plot_df.plot(kind='bar', rot=0, xlabel='', ylabel='', title='title')
for container in ax.containers:
ax.bar_label(container)
plt.legend(title='Gender', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
To add percentages to the top of the columns:
divide plot_df by the column totals
format as desired
zip with containers to add bar labels
plot_df = (
pd.crosstab(df['Dis'], df['Gender'])
.rename(columns={0: 'Male', 1: 'Female'})
)
# Calculate Percentages and format
labels_df = (
plot_df.div(plot_df.sum(axis=0)).mul(100).applymap('{:.2f}%'.format)
)
ax = plot_df.plot(kind='bar', rot=0, figsize=(9, 6), width=0.8,
xlabel='', ylabel='', title='title')
for container, col in zip(ax.containers, labels_df):
ax.bar_label(container, labels=labels_df[col])
plt.legend(title='Gender', bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()
plt.show()
labels_df:
Gender Male Female
Dis
0 24.04% 25.35%
1 28.28% 23.96%
2 25.05% 23.76%
3 22.63% 26.93%
Sample Data and imports used:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
np.random.seed(5)
df = pd.DataFrame({'Gender': np.random.choice([0, 1], 1000),
'Dis': np.random.choice([0, 1, 2, 3], 1000)})
If you want to do this with a for loop:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# assign data of lists.
data = {'Gender': [1,0,0,1,0,0,1,1], 'Dis': [2,0,1,3,0,1,0,1]}
# Create DataFrame
df = pd.DataFrame(data)
# Print the output.
print(df)
Then you create empty variables:
number_males_dis_0 = 0
number_females_dis_0 = 0
number_males_dis_1 = 0
number_females_dis_1 = 0
number_males_dis_2 = 0
number_females_dis_2 = 0
number_males_dis_3 = 0
number_females_dis_3 = 0
for i in range(0,len(data['Dis'])):
#print(i)
#dis = 0
if data['Dis'][i] == 0 and data['Gender'][i] == 0:
number_males_dis_0 += 1
elif data['Dis'][i] == 0 and data['Gender'][i] == 1:
number_females_dis_0 += 1
#dis = 1
elif data['Dis'][i] == 1 and data['Gender'][i] == 0:
number_males_dis_1 += 1
elif data['Dis'][i] == 1 and data['Gender'][i] == 1:
number_females_dis_1 += 1
#dis = 2
elif data['Dis'][i] == 2 and data['Gender'][i] == 0:
number_males_dis_2 += 1
elif data['Dis'][i] == 2 and data['Gender'][i] == 1:
number_females_dis_2 += 1
#dis = 3
elif data['Dis'][i] == 3 and data['Gender'][i] == 0:
number_males_dis_3 += 1
elif data['Dis'][i] == 3 and data['Gender'][i] == 1:
number_females_dis_3 += 1
Then the plot:
X = ['0','1','2','3']
M = [number_males_dis_0,number_males_dis_1,number_males_dis_2,number_males_dis_3]
F = [number_females_dis_0,number_females_dis_1,number_females_dis_2,number_females_dis_3]
X_axis = np.arange(len(X))
plt.bar(X_axis - 0.2, M, 0.4, label = 'Male')
plt.bar(X_axis + 0.2, F, 0.4, label = 'Female')
plt.xticks(X_axis, X)
plt.xlabel("")
plt.ylabel("")
plt.ylim(0,max([max(F),max(M)])+0.5)
plt.legend()
plt.title("title")
# Text on the top of each bar
for i in range(0,4):
plt.text(x = i - 0.25 , y = M[i] + 0.05, s = M[i], size = 10)
plt.text(x = i + 0.15 , y = F[i] + 0.05, s = F[i], size = 10)
plt.show()
Result:
Result

Scatter Pie Plot Python Pandas

"Scatter Pie Plot" ( a scatter plot using pie charts instead of dots). I require this as I have to represent 3 dimensions of data.
1: x axis (0-6)
2: y axis (0-6)
3: Category lets say (A,B,C - H)
If two x and y values are the same I want a pie chart to be in that position representing that Category.
Similar to the graph seen in this link:
https://matplotlib.org/gallery/lines_bars_and_markers/scatter_piecharts.html#sphx-glr-gallery-lines-bars-and-markers-scatter-piecharts-py
or this image from Tableu:
[![enter image description here][1]][1]
As I am limited to only use python I have been struggling to manipulate the code to work for me.
Could anyone help me with this problem? I would very grateful!
Example data:
XVAL YVAL GROUP
1.3 4.5 A
1.3 4.5 B
4 2 E
4 6 A
2 4 A
2 4 B
1 1 G
1 1 C
1 2 B
1 2 D
3.99 4.56 G
The final output should have 6 pie charts on the X & Y with 1 containing 3 groups and 2 containing 3 groups.
My attempt:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def draw_pie(dist,
xpos,
ypos,
size,
ax=None):
if ax is None:
fig, ax = plt.subplots(figsize=(10,8))
# for incremental pie slices
cumsum = np.cumsum(dist)
cumsum = cumsum/ cumsum[-1]
pie = [0] + cumsum.tolist()
for r1, r2 in zip(pie[:-1], pie[1:]):
angles = np.linspace(2 * np.pi * r1, 2 * np.pi * r2)
x = [0] + np.cos(angles).tolist()
y = [0] + np.sin(angles).tolist()
xy = np.column_stack([x, y])
ax.scatter([xpos], [ypos], marker=xy, s=size)
return ax
fig, ax = plt.subplots(figsize=(40,40))
draw_pie([Group],'xval','yval',10000,ax=ax)
draw_pie([Group], 'xval', 'yval', 20000, ax=ax)
draw_pie([Group], 'xval', 'yval', 30000, ax=ax)
plt.show()
I'm not sure how to get 6 pie charts. If we group on XVAL and YVAL, there are 7 unique pairs. You can do something down this line:
fig, ax = plt.subplots(figsize=(40,40))
for (x,y), d in df.groupby(['XVAL','YVAL']):
dist = d['GROUP'].value_counts()
draw_pie(dist, x, y, 10000*len(d), ax=ax)
plt.show()
Output:

Convert X and Y arrays into a frequencies grid

I would like to convert two arrays (x and y) into a frequency n x n matrix (n = 5), indicating each cell the number of point that contains. It consists on resampling both variables into five intervals and count the existing number of points per cell.
I have tried using pandas pivot_table but don't know the way of referencing to each axis coordinate.
X and Y arrays are two dependent variables that contain values between 0 and 100.
I would really appreciate some one's aid.
Thank you very much in advance.
This is an example of the code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Arrays example. They are always float type and ranging 0-100. (n_size array = 15)
x = 100 * np.random.random(15)
y = 100 * np.random.random(15)
# Df created for trying to pivot and counting values per cell
df = pd.DataFrame({'X':x,'Y':y})
# Plot the example data:
df.plot(x = 'X',y = 'Y', style = 'o')
This is what I have:
This is the objetive matrix, saved as a df:
If you do not explicitly need to use pandas (which you don't, if it's just about a frequency matrix), consider using numpy.histogram2d:
# Sample data
x = 100*np.random.random(15)
y = 100*np.random.random(15)
Construct your bins (since your x and y bins are the same, one set is enough)
bins = np.linspace(0, 100, 5+1)
# bins = array([ 0., 20., 40., 60., 80., 100.])
Now use the histogram function:
binned, binx, biny = np.histogram2d(x, y, bins = [bins, bins])
# To get the result you desire, transpose
objmat = binned.T
Note: x-values are binned along the first dimension(axis 0), which visually means 'vertical'. Hence the transpose.
Plotting:
fig, ax = plt.subplots()
ax.grid()
ax.set_xlim(0, 100)
ax.set_ylim(0, 100)
ax.scatter(x, y)
for i in range(objmat.shape[0]):
for j in range(objmat.shape[1]):
c = int(objmat[::-1][j,i])
ax.text((bins[i]+bins[i+1])/2, (bins[j]+bins[j+1])/2, str(c), fontdict={'fontsize' : 16, 'ha' : 'center', 'va' : 'center'})
Result:
You could use GroupBy.size
matching group axes to the center of each grid.
Then you can use Axes.text to draw them
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(20)
max_val = 100
n = 5
len_group = max_val // 5
x = max_val * np.random.random(15)
y = max_val * np.random.random(15)
# Df created for trying to pivot and counting values per cell
df = pd.DataFrame({'X':x,'Y':y})
x_groups = df['X'] // len_group * len_group + len_group / 2
y_groups = df['Y'] // len_group * len_group + len_group / 2
fig, ax= plt.subplots(figsize=(13, 6))
ax.set_ylim(0, max_val)
ax.set_xlim(0, max_val)
df.plot(x = 'X',y = 'Y', style = 'o', ax=ax)
for i, val in df.groupby([x_groups, y_groups]).size().items():
ax.text(*i, val,fontdict={'fontsize' : 20, 'ha' : 'center', 'va':'center'})
plt.grid()
You can just create bins with pd.cut and then groupby the bins and unstack along the X variable and you have a matrix of frequency counts.
df['Xc'] = pd.cut(df['X'], range(0, 101, 20))
df['Yc'] = pd.cut(df['Y'], range(0, 101, 20))
mat = df.groupby(['Xc', 'Yc']).size().unstack('Xc')
mat
Xc (0, 20] (20, 40] (40, 60] (60, 80] (80, 100]
Yc
(0, 20] 0 1 1 0 0
(20, 40] 4 0 1 2 0
(40, 60] 0 0 0 0 0
(60, 80] 3 0 1 0 0
(80, 100] 1 0 1 0 0
There is no elegant solution to the plotting part of the problem. But here's what you can do.
# Calculate the counts
counts = df.groupby([df.X.astype(int) // 20,
df.Y.astype(int) // 20]).size().astype(str)
# Restore the original scales
counts.index = pd.MultiIndex.from_tuples([(x * 20 + 10,
y * 20 + 10)
for x,y in counts.index.to_list()],
names=counts.index.names)
fig = plt.figure()
ax = fig.add_subplot(111)
# Plot the text labels
[ax.text(*xy, txt) for (xy, txt) in counts.items()]
# Update the axes extents
ax.axis([0, counts.index.levels[0].max() + 10,
0, counts.index.levels[1].max() + 10])
plt.show()
import pandas as pd
import numpy as np
import seaborn as sns
sns.set_style("whitegrid")
# Arrays example. They are always float type and ranging 0-100. (n_size array = 15)
x = 100 * np.random.random(15)
y = 100 * np.random.random(15)
# Df created for trying to pivot and counting values per cell
df = pd.DataFrame({'X':x,'Y':y})
ir = pd.interval_range(start=0, freq=20, end=100, closed='left')
df['xbin'] = pd.cut(df['X'], bins=ir)
df['ybin'] = pd.cut(df['Y'], bins=ir)
df['xbin'] = df['xbin'].apply(lambda x: x.mid)
df['ybin'] = df['ybin'].apply(lambda x: x.mid)
fig, ax= plt.subplots()
ax.set_ylim(0, 100)
ax.set_xlim(0, 100)
for i, val in df.groupby(['xbin', 'ybin']).size().items():
if val!=0:
ax.text(*i, val,fontdict={'fontsize' : 20, 'ha' : 'center', 'va' : 'center'})
One option is to call np.add.at on ravel of frequency matrix
x = 100 * np.random.random(15)
y = 100 * np.random.random(15)
n = 5
points = (np.array([x, y]) / 20).astype(int)
z = np.zeros((n, n), dtype=int)
np.add.at(z.ravel(),
np.ravel_multi_index(points, z.shape),
np.ones(points.shape[1]))
Sample run:
print(points)
print(z)
[[0 0 0 2 4 1 2 1 1 0 1 1 3 0 0]
[0 0 1 4 0 4 1 0 1 3 3 1 0 0 3]]
[[3 1 0 2 0]
[1 2 0 1 1]
[0 1 0 0 1]
[1 0 0 0 0]
[1 0 0 0 0]]

How to apply multivariate normal pdf function in Python with my own data

I use the following code to produce random data and plot the distribution of probability densities. How can I do the same with my own data as shown below?
Code
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["X", "Y"])
x, y = np.random.multivariate_normal(mean, cov, 1000).T
g = sns.jointplot(x=x, y=y, data=df, kind="kde", n_levels=75, color="m")
g.plot_joint(plt.scatter, c="black", s=30, linewidth=1, marker="+")
g.ax_joint.collections[0].set_alpha(0)
g.set_axis_labels("X", "Y");
My own data sample
X Y
0 1 8
1 7 8
2 7 9
3 5 8
4 7 7
5 9 9
6 1 3
4 6 8
5 9 7
6 9 6
7 8 2
8 1 9
9 0 10
10 22 2
11 4 45
12 9 8
I tried this but I am getting strange values
import numpy as np
mean = np.mean(data1['X'], axis=0)
cov = np.cov(data1['Y'], rowvar=0)
X = multivariate_normal.pdf(data1['X'], mean=2.5, cov=0.5)
mean = np.mean(data1['X'], axis=0)
cov = np.cov(data1['Y'], rowvar=0)
Y = multivariate_normal.pdf(data1['Y'], mean=2.5, cov=0.5)
df = np.concatenate( (X.reshape(-1,1),Y.reshape(-1,1)) , axis=1)
df = pd.DataFrame(df)
df = df.rename({0: 'X', 1: 'Y'}, axis=1)
g = sns.jointplot(x=X, y=Y, data=df, kind="kde", n_levels=75, color="r")
g.plot_joint(plt.scatter, c="black", s=30, linewidth=1, marker="+")
g.ax_joint.collections[0].set_alpha(0)
g.set_axis_labels("X", "Y");
This solution worked.
import numpy as np
mean = np.mean(data1['X'], axis=0)
cov = np.cov(data1['Y'], rowvar=0)
X = multivariate_normal.pdf(data1['X'], mean=2.5, cov=0.5)
mean = np.mean(data1['X'], axis=0)
cov = np.cov(data1['Y'], rowvar=0)
Y = multivariate_normal.pdf(data1['Y'], mean=2.5, cov=0.5)
df = np.concatenate( (X.reshape(-1,1),Y.reshape(-1,1)) , axis=1)
df = pd.DataFrame(df)
df = df.rename({0: 'X', 1: 'Y'}, axis=1)
g = sns.jointplot(x=X, y=Y, data=df, kind="kde", n_levels=75, color="r")
g.plot_joint(plt.scatter, c="black", s=30, linewidth=1, marker="+")
g.ax_joint.collections[0].set_alpha(0)
g.set_axis_labels("X", "Y");

Categories