I got a 3D scatterplot which looks like "tubes", what it in fact should display. Currently every "tube" consist out of 40 markers. What I am trying is, that these 40 markes together built a cylinder, that looks like a tube with the positional arguments from X, Yand Z and the coloration from C.
X = df['Tube']
Y = df['Window']
C = df['Value']
Z = df['Depth']
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter3D(X,Y,Z, marker='o',s=50, c=C, cmap = 'Reds',edgecolors= "black")
df
Tube Window Value Depth
0 1 1 0.000383 -0.1
1 1 2 0.023253 -0.1
2 1 3 0.022623 -0.1
3 1 4 0.003599 -0.1
4 1 5 0.001281 -0.1
... ... ... ... ...
2155 54 36 0.020977 -1.2
2156 54 37 0.000000 -1.2
2157 54 38 0.007104 -1.2
2158 54 39 0.015233 -1.2
2159 54 40 0.000000 -1.2
Does anybody has any idea how this might be possible?
It seems to work with mayavi.mlap.
from mayavi.mlab import *
from mayavi import mlab
from PyQt5 import QtWidgets
X = df['Tube']
Y = df['Window']
C = df['Value']
Z = df['Depth']
plot3d(X, Y, Z, C, tube_radius=0.25, colormap='Reds')
mlab.show()
Related
Below is the data that is used to create the histogram subplot charts in ploty express graph objects.
Below code is used to create histogram subplot charts in ploty express graph objects.
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
specs = [[{'type':'histogram'}, {'type':'histogram'},{'type':'histogram'}]]
fig = make_subplots(rows=1, cols=3, specs=specs, subplot_titles=['<b> Millenials </b>',
'<b> Generation X </b>',
'<b> Boomers </b>'])
fig.add_trace(go.Histogram(
x=df[df['Generation']=='Millenials']['NumCompaniesWorked'],
opacity = 0.5,
marker_color = ['#455f66'] * 15
),1,1)
fig.add_trace(go.Histogram(
x=df[df['Generation']=='Generation X']['NumCompaniesWorked'],
opacity = 0.5,
marker_color = ['#455f66'] * 15
),1,2)
fig.add_trace(go.Histogram(
x=df[df['Generation']=='Boomers']['NumCompaniesWorked'],
opacity = 0.5,
marker_color = ['#455f66'] * 15
),1,3)
fig.update_layout(
showlegend=False,
title=dict(text="<b> Histogram - <br> <span style='color: #f55142'> How to add the box plot and mean vertical line on each diagram </span></b> ",
font=dict(
family="Arial",
size=20,
color='#283747')
))
fig.show()
And below is the output I get from the above code
How can I include the mean (Average) vertical line in a histogram diagrams as the mean values are,
Millenials = 2.2
Generation X = 3.4
Boomers = 4.1
and a box plot above all 03 histogram diagrams.
Which should look like the shown diagram below for all 03 histogram diagrams.
import pandas as pd
import numpy as np
#original df
df = pd.DataFrame({'NumCompaniesWorked':list(range(10)),
'Millenials':[139,407,54,57,55,32,35,28,17,24],
'Generation X':[53,108,83,90,70,27,32,40,26,24],
'Boomers':[5,6,9,12,14,4,3,6,6,4]})
#reorganizing df
dfs = []
for col in ['Millenials', 'Generation X', 'Boomers']:
dfs.append(df[['NumCompaniesWorked', col]].rename(columns={col:'count'}).assign(Generation=col))
df = pd.concat(dfs)
#output
NumCompaniesWorked count Generation
0 0 139 Millenials
1 1 407 Millenials
2 2 54 Millenials
3 3 57 Millenials
4 4 55 Millenials
5 5 32 Millenials
6 6 35 Millenials
7 7 28 Millenials
8 8 17 Millenials
9 9 24 Millenials
0 0 53 Generation X
1 1 108 Generation X
2 2 83 Generation X
3 3 90 Generation X
4 4 70 Generation X
5 5 27 Generation X
6 6 32 Generation X
7 7 40 Generation X
8 8 26 Generation X
9 9 24 Generation X
0 0 5 Boomers
1 1 6 Boomers
2 2 9 Boomers
3 3 12 Boomers
4 4 14 Boomers
5 5 4 Boomers
6 6 3 Boomers
7 7 6 Boomers
8 8 6 Boomers
9 9 4 Boomers
fig = px.histogram(df,
x='NumCompaniesWorked',
y='count',
marginal='box',
facet_col='Generation')
fig.add_vline(x=2.2, line_width=1, line_dash='dash', line_color='gray', col=1)
fig.add_vline(x=3.4, line_width=1, line_dash='dash', line_color='gray', col=2)
fig.add_vline(x=4.1, line_width=1, line_dash='dash', line_color='gray', col=3)
fig.show()
I'm trying to figure out how to get a custom scale for my axis. My x-axis goes from 0 to 1,000,000 in 100,000 step increments, but I want to scale each of these numbers by 1/100, so that they go from 0 to 1,000 in 100 step increments. matplotlib.scale.FuncScale, but I'm having trouble getting it to work.
Here's what the plot currently looks like:
My code looks like this:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
dataPlot = pd.DataFrame({"plot1" : [1, 2, 3], "plot2" : [4, 5, 6], "plot3" : [7, 8, 9]})
ax = sns.lineplot(data = dataPlot, dashes = False, palette = ["blue", "red", "green"])
ax.set_xlim(1, numRows)
ax.set_xticks(range(0, numRows, 100000))
plt.ticklabel_format(style='plain')
plt.scale.FuncScale("xaxis", ((lambda x : x / 1000), (lambda y : y * 1000)))
When I run this code specifically, I get AttributeError: module 'matplotlib.pyplot' has no attribute 'scale', so I tried adding import matplotlib as mpl to the top of the code and then changing the last line to be mpl.scale.FuncScale("xaxis", ((lambda x : x / 1000), (lambda y : y * 1000))) and that actually ran without error, but but it didn't change anything.
How can I get this to properly scale the axis?
Based on the clarification from the question comments a straightforward solution scaling the x-axis data in the dataframe (x-data in the question case being the df index) and then plot.
Using example data since the code from the question wasn't running on its own.
x starting range is 0 to 100, and then scaled to 0 to 10, but that's equivalent to any other starting range and scaling.
1st the default df.plot: (just as reference)
import pandas as pd
import numpy as np
arr = np.arange(0, 101, 1) * 1.5
df = pd.DataFrame(arr, columns=['y_data'])
print(df)
y_data
0 0.0
1 1.5
2 3.0
3 4.5
4 6.0
.. ...
96 144.0
97 145.5
98 147.0
99 148.5
100 150.0
df.plot()
Note that per default df.plot uses the index as x-axis.
2nd scaling the x-data in the dataframe:
The interims dfs are only displayed to follow along.
Preparation
df.reset_index(inplace=True)
Getting the original index data as a column to further work with (see scaling below).
index y_data
0 0 0.0
1 1 1.5
2 2 3.0
3 3 4.5
4 4 6.0
.. ... ...
96 96 144.0
97 97 145.5
98 98 147.0
99 99 148.5
100 100 150.0
df = df.rename(columns = {'index':'x_data'}) # just to be more explicit
x_data y_data
0 0 0.0
1 1 1.5
2 2 3.0
3 3 4.5
4 4 6.0
.. ... ...
96 96 144.0
97 97 145.5
98 98 147.0
99 99 148.5
100 100 150.0
Scaling
df['x_data'] = df['x_data'].apply(lambda x: x/10)
x_data y_data
0 0.0 0.0
1 0.1 1.5
2 0.2 3.0
3 0.3 4.5
4 0.4 6.0
.. ... ...
96 9.6 144.0
97 9.7 145.5
98 9.8 147.0
99 9.9 148.5
100 10.0 150.0
3rd df.plot with specific columns:
df.plot(x='x_data', y = 'y_data')
By x= a specific column instead of the default = index is used as the x-axis.
Note that the y data hasn't changed but the x-axis is now scaled compared to the "1st the default df.plot" above.
I'd like to make this type of plot with multiple columns separated by small whitespace, each having different category having 3-5 (5 in this example) different observations with varying values on y axis:
actually, i can plot this plot use ggplot2. for example:
head(mtcars)
# mpg cyl disp hp drat wt qsec vs am gear carb
# Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
# Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
# Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
# Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
# Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
# Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
library(dplyr)
library(ggplot2)
mtcars %>% reshape2::melt() %>%
ggplot(aes(x = variable, y = value)) +
geom_point() + facet_grid(~ variable) +
theme(axis.text.x = element_blank())
you set a categorical variable in your dataset,then use the facet_grid(~).this function can change your plot into multiple plot by your categrical variable
Here is an approach to draw a similar plot using Python's matplotlib. The plot has a grey background and white major and minor gridlines to delimit the zones. Getting the dots in the center of each little cell is somewhat tricky: divide into n+1 spaces and shift half a cell (1/2n). A secondary x-axis can be used to set the labels. A zorder has to be set to have the dots on top of the gridlines.
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import ticker
n = 5
cols = 7
values = [np.random.uniform(1, 10, n) for c in range(cols)]
fig, ax = plt.subplots()
ax.set_facecolor('lightgrey')
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(1 / (n)))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
ax.grid(True, which='both', axis='both', color='white')
ax.set_xticklabels([])
ax.tick_params(axis='x', which='both', length=0)
ax.grid(which='major', axis='both', lw=3)
ax.set_xlim(1, cols + 1)
for i in range(1, cols + 1):
ax.scatter(np.linspace(i, i + 1, n, endpoint=False) + 1 / (2 * n), values[i-1], c='crimson', zorder=2)
ax2 = ax.twiny()
ax2.set_xlim(0.5, cols + 0.5)
ticks = range(1, cols + 1)
ax2.set_xticks(ticks)
ax2.set_xticklabels([f'Cat_{t:02d}' for t in ticks])
bbox = dict(boxstyle="round", ec="limegreen", fc="limegreen", alpha=0.5)
plt.setp(ax2.get_xticklabels(), bbox=bbox)
ax2.tick_params(axis='x', length=0)
plt.show()
I need to compare different sets of daily data between 4 shifts(categorical / groupby), using bar graphs and line graphs. I have looked everywhere and have not found a working solution for this that doesn't include generating new pivots and such.
I've used both, matplotlib and seaborn, and while I can do one or the other(different colored bars/lines for each shift), once I incorporate the other, either one disappears, or other anomalies happen like only one plot point shows. I have looked all over and there are solutions for representing a single series of data on both chart types, but none that goes into multi category or grouped for both.
Data Example:
report_date wh_id shift Head_Count UTL_R
3/17/19 55 A 72 25%
3/18/19 55 A 71 10%
3/19/19 55 A 76 20%
3/20/19 55 A 59 33%
3/21/19 55 A 65 10%
3/22/19 55 A 54 20%
3/23/19 55 A 66 14%
3/17/19 55 1 11 10%
3/17/19 55 2 27 13%
3/17/19 55 3 18 25%
3/18/19 55 1 23 100%
3/18/19 55 2 16 25%
3/18/19 55 3 12 50%
3/19/19 55 1 28 10%
3/19/19 55 2 23 50%
3/19/19 55 3 14 33%
3/20/19 55 1 29 25%
3/20/19 55 2 29 25%
3/20/19 55 3 10 50%
3/21/19 55 1 17 20%
3/21/19 55 2 29 14%
3/21/19 55 3 30 17%
3/22/19 55 1 12 14%
3/22/19 55 2 10 100%
3/22/19 55 3 17 14%
3/23/19 55 1 16 10%
3/23/19 55 2 11 100%
3/23/19 55 3 13 10%
tm_daily_df = pd.read_csv('fg_TM_Daily.csv')
tm_daily_df = tm_daily_df.set_index('report_date')
fig2, ax2 = plt.subplots(figsize=(12,8))
ax3 = ax2.twinx()
group_obj = tm_daily_df.groupby('shift')
g = group_obj['Head_Count'].plot(kind='bar', x='report_date', y='Head_Count',ax=ax2,stacked=False,alpha = .2)
g = group_obj['UTL_R'].plot(kind='line',x='report_date', y='UTL_R', ax=ax3,marker='d', markersize=12)
plt.legend(tm_daily_df['shift'].unique())
This code has gotten me the closest I've been able to get. Notice that even with stacked = False, they are still stacked. I changed the setting to True, and nothing changes.
All i need is for the bars to be next to each other with the same color scheme representative of the shift
The graph:
Here are two solutions (stacked and unstacked). Based on your questions we will:
plot Head_Count in the left y axis and UTL_R in the right y axis.
report_date will be our x axis
shift will represent the hue of our graph.
The stacked version uses pandas default plotting feature, and the unstacked version uses seaborn.
EDIT
From your request, I added a 100% stacked graph. While it is not quite exactly what you asked in the comment, the graph type you asked may create some confusion when reading (are the values based on the upper line of the stack or the width of the stack). An alternative solution may be using a 100% stacked graph.
Stacked
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
dfg = df.set_index(['report_date', 'shift']).sort_index(level=[0,1])
fig, ax = plt.subplots(figsize=(12,6))
ax2 = ax.twinx()
dfg['Head_Count'].unstack().plot.bar(stacked=True, ax=ax, alpha=0.6)
dfg['UTL_R'].unstack().plot(kind='line', ax=ax2, marker='o', legend=None)
ax.set_title('My Graph')
plt.show()
Stacked 100%
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
dfg = df.set_index(['report_date', 'shift']).sort_index(level=[0,1])
# Create `Head_Count_Pct` column
for date in dfg.index.get_level_values('report_date').unique():
for shift in dfg.loc[date, :].index.get_level_values('shift').unique():
dfg.loc[(date, shift), 'Head_Count_Pct'] = dfg.loc[(date, shift), 'Head_Count'].sum() / dfg.loc[(date, 'A'), 'Head_Count'].sum()
fig, ax = plt.subplots(figsize=(12,6))
ax2 = ax.twinx()
pal = sns.color_palette("Set1")
dfg[dfg.index.get_level_values('shift').isin(['1','2','3'])]['Head_Count_Pct'].unstack().plot.bar(stacked=True, ax=ax, alpha=0.5, color=pal)
dfg['UTL_R'].unstack().plot(kind='line', ax=ax2, marker='o', legend=None, color=pal)
ax.set_title('My Graph')
plt.show()
Unstacked
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
dfg = df.set_index(['report_date', 'shift']).sort_index(level=[0,1])
fig, ax = plt.subplots(figsize=(15,6))
ax2 = ax.twinx()
sns.barplot(x=dfg.index.get_level_values('report_date'),
y=dfg.Head_Count,
hue=dfg.index.get_level_values('shift'), ax=ax, alpha=0.7)
sns.lineplot(x=dfg.index.get_level_values('report_date'),
y=dfg.UTL_R,
hue=dfg.index.get_level_values('shift'), ax=ax2, marker='o', legend=None)
ax.set_title('My Graph')
plt.show()
EDIT #2
Here is the graph as you requested in a second time (stacked, but stack n+1 does not start where stack n ends).
It is slightly more involving as we have to do multiple things:
- we need to manually assign our color to our shift in our df
- once we have our colors assign, we will iterate through each date range and 1) sort or Head_Count values descending (so that our largest sack is in the back when we plot the graph), and 2) plot the data and assign the color to each stacj
- Then we can create our second y axis and plot our UTL_R values
- Then we need to assign the correct color to our legend labels
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
def assignColor(shift):
if shift == 'A':
return 'R'
if shift == '1':
return 'B'
if shift == '2':
return 'G'
if shift == '3':
return 'Y'
# map a color to a shift
df['color'] = df['shift'].apply(assignColor)
fig, ax = plt.subplots(figsize=(15,6))
# plot our Head_Count values
for date in df.report_date.unique():
d = df[df.report_date == date].sort_values(by='Head_Count', ascending=False)
y = d.Head_Count.values
x = date
color = d.color
b = plt.bar(x,y, color=color)
# Plot our UTL_R values
ax2 = ax.twinx()
sns.lineplot(x=df.report_date, y=df.UTL_R, hue=df['shift'], marker='o', legend=None)
# Assign the color label color to our legend
leg = ax.legend(labels=df['shift'].unique(), loc=1)
legend_maping = dict()
for shift in df['shift'].unique():
legend_maping[shift] = df[df['shift'] == shift].color.unique()[0]
i = 0
for leg_lab in leg.texts:
leg.legendHandles[i].set_color(legend_maping[leg_lab.get_text()])
i += 1
How about this?
tm_daily_df['UTL_R'] = tm_daily_df['UTL_R'].str.replace('%', '').astype('float') / 100
pivoted = tm_daily_df.pivot_table(values=['Head_Count', 'UTL_R'],
index='report_date',
columns='shift')
pivoted
# Head_Count UTL_R
# shift 1 2 3 A 1 2 3 A
# report_date
# 3/17/19 11 27 18 72 0.10 0.13 0.25 0.25
# 3/18/19 23 16 12 71 1.00 0.25 0.50 0.10
# 3/19/19 28 23 14 76 0.10 0.50 0.33 0.20
# 3/20/19 29 29 10 59 0.25 0.25 0.50 0.33
# 3/21/19 17 29 30 65 0.20 0.14 0.17 0.10
# 3/22/19 12 10 17 54 0.14 1.00 0.14 0.20
# 3/23/19 16 11 13 66 0.10 1.00 0.10 0.14
fig, ax = plt.subplots()
pivoted['Head_Count'].plot.bar(ax=ax)
pivoted['UTL_R'].plot.line(ax=ax, legend=False, secondary_y=True, marker='D')
ax.legend(loc='upper left', title='shift')
EDIT 2
I fixed one part of the code that was wrong, With that line of code, I add the category for every information (Axis X).
y = joy(cat, EveryTest[i].GPS)
After adding that line of code, the graph improved, but something is still failing. The graph starts with the 4th category (I mean 12:40:00), and it must start in the first (12:10:00), What I am doing wrong?
EDIT 1:
I Updated Bkoeh to 0.12.13, then the label problem was fixed.
Now my problem is:
I suppose the loop for (for i, cat in enumerate(reversed(cats)):) put every chart on the label, but do not happen that. I see the chart stuck in the 5th o 6th label. (12:30:00 or 12:50:00)
- Start of question -
I am trying to reproduce the example of joyplot. But I have trouble when I want to lot my own data. I dont want to plot an histogram, I want to plot some list in X and some list in Y. But I do not understand what I am doing wrong.
the code (Fixed):
from numpy import linspace
from scipy.stats.kde import gaussian_kde
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource, FixedTicker, PrintfTickFormatter
from bokeh.plotting import figure
#from bokeh.sampledata.perceptions import probly
bokeh.BOKEH_RESOURCES='inline'
import colorcet as cc
output_file("joyplot.html")
def joy(category, data, scale=20):
return list(zip([category]*len(data),data))
#Elements = 7
cats = ListOfTime # list(reversed(probly.keys())) #list(['Pos_1','Pos_2']) #
print len(cats),' lengh of times'
palette = [cc.rainbow[i*15] for i in range(16)]
palette += palette
print len(palette),'lengh palette'
x = X # linspace(-20,110, 500) #Test.X #
print len(x),' lengh X'
source = ColumnDataSource(data=dict(x=x))
p = figure(y_range=cats, plot_width=900, x_range=(0, 1500), toolbar_location=None)
for i, cat in enumerate(reversed(cats)):
y = joy(cat, EveryTest[i].GPS)
#print cat
source.add(y, cat)
p.patch('x', cat, color=palette[i], alpha=0.6, line_color="black", source=source)
#break
print source
p.outline_line_color = None
p.background_fill_color = "#efefef"
p.xaxis.ticker = FixedTicker(ticks=list(range(0, 1500, 100)))
#p.xaxis.formatter = PrintfTickFormatter(format="%d%%")
p.ygrid.grid_line_color = None
p.xgrid.grid_line_color = "#dddddd"
p.xgrid.ticker = p.xaxis[0].ticker
p.axis.minor_tick_line_color = None
p.axis.major_tick_line_color = None
p.axis.axis_line_color = None
#p.y_range.range_padding = 0.12
#p
show(p)
the variables are:
print X, type(X)
[ 3 6 9 12 15 18 21 24 27 30 33 36 39 42 45 48 51 54 57 60 63 66 69 72 75
78 81 84 87 90 93 96 99] <type 'numpy.ndarray'>
and
print EveryTest[0].GPS, type(EveryTest[i].GPS)
0 2
1 2
2 2
3 2
4 2
5 2
6 2
7 2
8 2
9 2
10 2
11 2
12 2
13 2
14 2
15 2
16 2
17 2
18 2
19 2
20 2
21 2
22 2
23 2
24 2
25 2
26 2
27 2
28 2
29 2
30 2
31 2
32 2
Name: GPS, dtype: int64 <class 'pandas.core.series.Series'>
Following the example, the type of data its ok. But I get the next image:
And I expected something like this: