I am trying to write a define function to plot a line graph by the data of a imported a csv file.
This a small sample of my data( temperature reading for every minutes):-
00:01:00.0305040, 35.35985
00:02:00.0438094, 35.48547
00:03:00.0571148, 35.65295
00:04:00.0704203, 35.90417
00:05:00.0837257, 36.23914
.
.
.
.
08:52:07.2370729, 74.92772
08:53:07.2503783, 75.01146
08:54:07.2648837, 75.05333
08:55:07.2781891, 75.0952
08:56:07.2914945, 75.0952
When I try to set the x ticker to be appear every hour, they do not show up in the plotted graph.
This is my code
df = pd.read_csv(file,names=["time", "temp"])
df["time"]=pd.to_datetime(df["time"])
df=df.set_index('time')
df.index = df.index.map (lambda t: t.strftime('%H:%M'))
print(df)
fig, ax = plt.subplots()
df.plot(ax = ax, color = 'black', linewidth = 0.4, x_compat=True)
ax.set(xlabel='Time (Hour:Minutes)', ylabel='Temperature (Celsius)')
ax.xaxis.set_major_locator(mdates.HourLocator(interval = 1))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate()
return plt.show()
I have tried labeling the x tickers manually
plt.xticks(['0:00', '1:00', '2:00', '3:00', '4:00:0', '5:00', '6:00:0', '7:00', '8:00', '9:00', '10:00'])
and it worked, but it there a way for any given case?
According to the official documentation
All of plotting functions expect np.array or np.ma.masked_array as input. Classes that are 'array-like' such as pandas data objects and np.matrix may or may not work as intended. It is best to convert these to np.array objects prior to plotting.
So I changed your code slightly (basically converted the pd df into numpy array).
df = pd.read_csv(file,names=["time", "temp"])
df["time"]=pd.to_datetime(df["time"])
x_axis = np.array(df.time.values)
y_axis = np.array(df.temp.values)
fig, ax = plt.subplots()
ax.plot(x_axis,y_axis)
ax.set(xlabel='Time (Hour:Minutes)', ylabel='Temperature (Celsius)')
ax.xaxis.set_major_locator(mdates.HourLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
plt.show()
The ticks are visible now as below.
Related
I have created a barplot for given days of the year and the number of people born on this given day (figure a). I want to set the x-axes in my seaborn barplot to xlim = (0,365) to show the whole year.
But, once I use ax.set_xlim(0,365) the bar plot is simply moved to the left (figure b).
This is the code:
#data
df = pd.DataFrame()
df['day'] = np.arange(41,200)
df['born'] = np.random.randn(159)*100
#plot
f, axes = plt.subplots(4, 4, figsize = (12,12))
ax = sns.barplot(df.day, df.born, data = df, hue = df.time, ax = axes[0,0], color = 'skyblue')
ax.get_xaxis().set_label_text('')
ax.set_xticklabels('')
ax.set_yscale('log')
ax.set_ylim(0,10e3)
ax.set_xlim(0,366)
ax.set_title('SE Africa')
How can I set the x-axes limits to day 0 and 365 without the bars being shifted to the left?
IIUC, the expected output given the nature of data is difficult to obtain straightforwardly, because, as per the documentation of seaborn.barplot:
This function always treats one of the variables as categorical and draws data at ordinal positions (0, 1, … n) on the relevant axis, even when the data has a numeric or date type.
This means the function seaborn.barplot creates categories based on the data in x (here, df.day) and they are linked to integers, starting from 0.
Therefore, it means even if we have data from day 41 onwards, seaborn is going to refer the starting category with x = 0, making for us difficult to tweak the lower limit of x-axis post function call.
The following code and corresponding plot clarifies what I explained above:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# data
rng = np.random.default_rng(101)
day = np.arange(41,200)
born = rng.integers(low=0, high=10e4, size=200-41)
df = pd.DataFrame({"day":day, "born":born})
# plot
f, ax = plt.subplots(figsize=(4, 4))
sns.barplot(data=df, x='day', y='born', ax=ax, color='b')
ax.set_xlim(0,365)
ax.set_xticks(ticks=np.arange(0, 365, 30), labels=np.arange(0, 365, 30))
ax.set_yscale('log')
ax.set_title('SE Africa')
plt.tight_layout()
plt.show()
I suggest using matplotlib.axes.Axes.bar to overcome this issue, although handling colors of the bars would be not straightforward compared to sns.barplot(..., hue=..., ...) :
# plot
f, ax = plt.subplots(figsize=(4, 4))
ax.bar(x=df.day, height=df.born) # instead of sns.barplot
ax.get_xaxis().set_label_text('')
ax.set_xlim(0,365)
ax.set_yscale('log')
ax.set_title('SE Africa')
plt.tight_layout()
plt.show()
I usually don't ask questions on this platform, but I have a problem that quite bugs me.
Context
I have a function that plots data from a dataframe that has stockdata. It all works perfectly except for the fact that a second, empty window shows next to the actual graph whenever I execute this function. (image)
Here is all the relevant code, I'd be very grateful if some smart people could help me.
def plot(self):
plt.clf()
plt.cla()
colors = Colors()
data = self.getStockData()
if data.empty:
return
data.index = [TimeData.fromTimestamp(x) for x in data.index]
current, previous = data.iloc[-1, 1], data.iloc[0, 1]
percentage = (current / previous - 1) * 100
# Create a table
color = colors.decideColorPct(percentage)
# Create the table
fig = plt.figure(edgecolor=colors.NEUTRAL_COLOR)
fig.patch.set_facecolor(colors.BACKGROUND_COLOR)
plt.plot(data.close, color=color)
plt.title(self.__str2__(), color=colors.NEUTRAL_COLOR)
plt.ylabel("Share price in $", color=colors.NEUTRAL_COLOR)
plt.xlabel("Date", color=colors.NEUTRAL_COLOR)
ax = plt.gca()
ax.xaxis.set_major_formatter(plt_dates.DateFormatter('%Y/%m/%d %H:%M'))
ax.set_xticks([data.index[0], data.index[-1]])
ax.set_facecolor(colors.BACKGROUND_COLOR)
ax.tick_params(color=colors.NEUTRAL_COLOR, labelcolor=colors.NEUTRAL_COLOR)
for spine in ax.spines.values():
spine.set_edgecolor(colors.NEUTRAL_COLOR)
ax.yaxis.grid(True, color=colors.NEUTRAL_COLOR, linestyle=(0, (5, 10)), linewidth=.5)
plt.show()
Some notes:
Matplotlib never gets used in the program before this.
The data is standardized and consists of the following columns: open, low, high, close, volume.
The index of the dataframe exists of timestamps, which gets converted to an index of datetime objects at the following line: data.index = [TimeData.fromTimestamp(x) for x in data.index]
Remove plt.clf() and plt.cla() because it automatically creates window for plot when you don't have this window.
And later fig = plt.figure() creates new window which it uses to display your plot.
Minimal code for test
import matplotlib.pyplot as plt
import pandas as pd
data = pd.DataFrame({'x': [1,2,3], 'y': [2,3,1]})
#plt.clf()
#plt.cla()
fig = plt.figure()
plt.plot(data)
ax = plt.gca()
plt.show()
Hi I'm trying to plot a pointplot and scatterplot on one graph with the same dataset so I can see the individual points that make up the pointplot.
Here is the code I am using:
xlPath = r'path to data here'
df = pd.concat(pd.read_excel(xlPath, sheet_name=None),ignore_index=True)
sns.pointplot(data=df, x='ID', y='HM (N/mm2)', palette='bright', capsize=0.15, alpha=0.5, ci=95, join=True, hue='Layer')
sns.scatterplot(data=df, x='ID', y='HM (N/mm2)')
plt.show()
When I plot, for some reason the points from the scatterplot are offsetting one ID spot right on the x-axis. When I plot the scatter or the point plot separately, they each are in the correct ID spot. Why would plotting them on the same plot cause the scatterplot to offset one right?
Edit: Tried to make the ID column categorical, but that didn't work either.
Seaborn's pointplot creates a categorical x-axis while here the scatterplot uses a numerical x-axis.
Explicitly making the x-values categorical: df['ID'] = pd.Categorical(df['ID']), isn't sufficient, as the scatterplot still sees numbers. Changing the values to strings does the trick. To get them in the correct order, sorting might be necessary.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# first create some test data
df = pd.DataFrame({'ID': np.random.choice(np.arange(1, 49), 500),
'HM (N/mm2)': np.random.uniform(1, 10, 500)})
df['Layer'] = ((df['ID'] - 1) // 6) % 4 + 1
df['HM (N/mm2)'] += df['Layer'] * 8
df['Layer'] = df['Layer'].map(lambda s: f'Layer {s}')
# sort the values and convert the 'ID's to strings
df = df.sort_values('ID')
df['ID'] = df['ID'].astype(str)
fig, ax = plt.subplots(figsize=(12, 4))
sns.pointplot(data=df, x='ID', y='HM (N/mm2)', palette='bright',
capsize=0.15, alpha=0.5, ci=95, join=True, hue='Layer', ax=ax)
sns.scatterplot(data=df, x='ID', y='HM (N/mm2)', color='purple', ax=ax)
ax.margins(x=0.02)
plt.tight_layout()
plt.show()
Updated question and code!
Probably, the tips dataset is not the best example to use, however my issue is reproduced in it, i.e. we see that both point and bar plots share the same Y
I need to combine line and bar plots on one chart. To do this I used seaborn and the following code:
tips = sns.load_dataset('tips')
g = sns.FacetGrid(tips, hue='sex', col='sex', size=4, aspect=2.1, sharey=False, sharex=False)
g = g.map(sns.pointplot, 'day', 'tip', ci=0)
g = g.map(sns.barplot, 'day', 'total_bill', ci=0)
g.set_xticklabels(rotation=45, fontsize=9)
g.set_xticklabels(rotation=45, fontsize=9)
plt.show()
Here is the result:
Everything is okay except the fact that one Y axis is used for both bars and lines on each facetgrid object. I am new to seaborn and currently cannot find a solution. Tried to add "sharey=False" to this line of code
> `g.map(sns.pointplot, 'date', 'worthusdcount')`
however it didn't help.
Any solutions on how to add second Y axis would be appreciated
Here's an example where you apply a custom mapping function to the dataframe of interest. Within the function, you can call plt.gca() to get the current axis at the facet being currently plotted in FacetGrid. Once you have the axis, twinx() can be called just like you would in plain old matplotlib plotting.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
def facetgrid_two_axes(*args, **kwargs):
data = kwargs.pop('data')
dual_axis = kwargs.pop('dual_axis')
alpha = kwargs.pop('alpha', 0.2)
kwargs.pop('color')
ax = plt.gca()
if dual_axis:
ax2 = ax.twinx()
ax2.set_ylabel('Second Axis!')
ax.plot(data['x'],data['y1'], **kwargs, color='red',alpha=alpha)
if dual_axis:
ax2.bar(df['x'],df['y2'], **kwargs, color='blue',alpha=alpha)
df = pd.DataFrame()
df['x'] = np.arange(1,5,1)
df['y1'] = 1 / df['x']
df['y2'] = df['x'] * 100
df['facet'] = 'foo'
df2 = df.copy()
df2['facet'] = 'bar'
df3 = pd.concat([df,df2])
win_plot = sns.FacetGrid(df3, col='facet', size=6)
(win_plot.map_dataframe(facetgrid_two_axes, dual_axis=True)
.set_axis_labels("X", "First Y-axis"))
plt.show()
This isn't the prettiest plot as you might want to adjust the presence of the second y-axis' label, the spacing between plots, etc. but the code suffices to show how to plot two series of differing magnitudes within FacetGrids.
I have a dataframe which has a number of values per date (datetime field). This values are classified in U (users) and S (session) by using a column Group. Seaborn is used to visualize two boxplots per date, where the hue is set to Group.
The problem comes when considering that the values corresponding to U (users) are much bigger than those corresponding to S (session), making the S data illegible. Thus, I need to come up with a solution that allows me to plot both series (U and S) in the same figure in an understandable manner.
I wonder if independent Y axes (with different scales) can be set to each hue, so that both Y axes are shown (as when using twinx but without losing hue visualization capabilities).
Any other alternative would be welcome =)
The S boxplot time series boxplot:
The combined boxplot time series using hue. Obviously it's not possible to see any information about the S group because of the scale of the Y axis:
The columns of the dataframe:
| Day (datetime) | n_data (numeric) | Group (S or U)|
The code line generating the combined boxplot:
seaborn.boxplot(ax=ax,x='Day', y='n_data', hue='Group', data=df,
palette='PRGn', showfliers=False)
Managed to find a solution by using twinx:
fig,ax= plt.subplots(figsize=(50,10))
tmpU = groups.copy()
tmpU.loc[tmp['Group']!='U','n_data'] = np.nan
tmpS = grupos.copy()
tmpS.loc[tmp['Group']!='S','n_data'] = np.nan
ax=seaborn.boxplot(ax=ax,x='Day', y = 'n_data', hue='Group', data=tmpU, palette = 'PRGn', showfliers=False)
ax2 = ax.twinx()
seaborn.boxplot(ax=ax2,x='Day', y = 'n_data', hue='Group', data=tmpS, palette = 'PRGn', showfliers=False)
handles,labels = ax.get_legend_handles_labels()
l= plt.legend(handles[0:2],labels[0:2],loc=1)
plt.setp(ax.get_xticklabels(),rotation=30,horizontalalignment='right')
for label in ax.get_xticklabels()[::2]:
label.set_visible(False)
plt.show()
plt.close('all')
The code above generates the following figure:
Which in this case turns out to be too dense to be published. Therefore I would adopt a visualization based in subplots, as Parfait susgested in his/her answer.
It wasn't an obvious solution to me so I would like to thank Parfait for his/her answer.
Consider building separate plots on same figure with y-axes ranges tailored to subsetted data. Below demonstrates with random data seeded for reproducibility (for readers of this post).
Data (with U values higher than S values)
import pandas as pd
import numpy as np
import seaborn
import matplotlib.pyplot as plt
np.random.seed(2018)
u_df = pd.DataFrame({'Day': pd.date_range('2016-10-01', periods=10)\
.append(pd.date_range('2016-10-01', periods=10)),
'n_data': np.random.uniform(0,800,20),
'Group': 'U'})
s_df = pd.DataFrame({'Day': pd.date_range('2016-10-01', periods=10)\
.append(pd.date_range('2016-10-01', periods=10)),
'n_data': np.random.uniform(0,200,20),
'Group': 'S'})
df = pd.concat([u_df, s_df], ignore_index=True)
df['Day'] = df['Day'].astype('str')
Plot
fig = plt.figure(figsize=(10,5))
for i,g in enumerate(df.groupby('Group')):
plt.title('N_data of {}'.format(g[0]))
plt.subplot(2, 1, i+1)
seaborn.boxplot(x="Day", y="n_data", data=g[1], palette="PRGn", showfliers=False)
plt.tight_layout()
plt.show()
plt.clf()
plt.close('all')
To retain original hue and grouping, render all non-group n_data to np.nan:
fig = plt.figure(figsize=(10,5))
for i,g in enumerate(df.Group.unique()):
plt.subplot(2, 1, i+1)
tmp = df.copy()
tmp.loc[tmp['Group']!=g, 'n_data'] = np.nan
seaborn.boxplot(x="Day", y="n_data", hue="Group", data=tmp,
palette="PRGn", showfliers=False)
plt.tight_layout()
plt.show()
plt.clf()
plt.close('all')
So one option to do a grouped box plot with two separate axis is to use hue_order= ['value, np.nan] in your argument for sns.boxplot:
fig = plt.figure(figsize=(14,8))
ax = sns.boxplot(x="lon_bucketed", y="value", data=m, hue='name', hue_order=['co2',np.nan],
width=0.75,showmeans=True,meanprops={"marker":"s","markerfacecolor":"black", "markeredgecolor":"black"},linewidth=0.5 ,palette = customPalette)
ax2 = ax.twinx()
ax2 = sns.boxplot(ax=ax2,x="lon_bucketed", y="value", data=m, hue='name', hue_order=[np.nan,'g_xco2'],
width=0.75,showmeans=True,meanprops={"marker":"s","markerfacecolor":"black", "markeredgecolor":"black"},linewidth=0.5, palette = customPalette)
ax1.grid(alpha=0.5, which = 'major')
plt.tight_layout()
ax.legend_.remove()
GW = mpatches.Patch(color='seagreen', label='$CO_2$')
WW = mpatches.Patch(color='mediumaquamarine', label='$XCO_2$')
ax, ax2.legend(handles=[GW,WW], loc='upper right',prop={'size': 14}, fontsize=12)
ax.set_title("$XCO_2$ vs. $CO_2$",fontsize=18)
ax.set_xlabel('Longitude [\u00b0]',fontsize=14)
ax.set_ylabel('$CO_2$ [ppm]',fontsize=14)
ax2.set_ylabel('$XCO_2$ [ppm]',fontsize=14)
ax.tick_params(labelsize=14)