I'm working with a dictionary of values which have a string (date) and float for time in milliseconds. I want to present the data in a bar graph and also with a table below. I have the bar graph working but the table gets messed up. I want the dates as columns and time as a single row.
The dictionary is something like:
time_and_dates_for_plot = {'04-26': 488.1063166666667, '04-27': 289.7289333333333, '04-28': 597.2343999999999, '04-29': 0, '04-30': 0, '05-01': 1061.958075}
plot.bar(range(len(time_and_dates_for_plot)), time_and_dates_for_plot.values(), align='center')
plot.xticks(range(len(time_and_dates_for_plot)), list(time_and_dates_for_plot.keys()))
plot.xlabel('Date (s)')
plot.ylabel('milliseconds')
plot.grid(True)
plot.gca().set_position((.1, .3, .8, .6))
col_labels = list(time_and_dates_for_plot.keys())
print(col_labels)
row_labels = ['ms']
cell_text = []
val = []
for key in time_and_dates_for_plot.keys():
val.append((time_and_dates_for_plot.get(key)))
cell_text.append(val)
val = []
print(cell_text)
plot.table(cellText=cell_text, colLabels=col_labels)
plot.show()
As you can see from the picture, I get all entries under one column where as I want something like one cell data under one coloumn (just tabulate plot data).
Also, how do I add some padding between the table and graph?
First time I'm using matplotlib and pretty sure I'm missing something. Any help is really appreciated.
In the table function you need an extra pair of brackets []. ...cellText=[cell_text]...
Also, you can use subplots to have a better arrangement of the plots. Here, my solution uses subplots of 2 rows withheight_ratiosof 8 to 1, and ahspace` pf 0.3
import matplotlib as mpl
import matplotlib.pyplot as plt
time_and_dates_for_plot = {'04-26': 488.1063166666667,
'04-27': 289.7289333333333,
'04-28': 597.2343999999999,
'04-29': 0,
'04-30': 0,
'05-01': 1061.958075}
fig,axs = plt.subplots(figsize=(8,5),ncols=1,nrows=2,
gridspec_kw={'height_ratios':[8,1],'hspace':0.3})
ax = axs[0]
ax.bar(range(len(time_and_dates_for_plot)),
time_and_dates_for_plot.values(), align='center')
ax.set_xticks(range(len(time_and_dates_for_plot)),
list(time_and_dates_for_plot.keys()))
ax.set_xlabel('Date (s)')
ax.set_ylabel('milliseconds')
ax.grid(True)
col_labels = list(time_and_dates_for_plot.keys())
row_labels = ['ms']
cell_text = []
for key in time_and_dates_for_plot.keys():
cell_text += [time_and_dates_for_plot[key]]
ax = axs[1]
ax.set_frame_on(False) # turn off frame for the table subplot
ax.set_xticks([]) # turn off x ticks for the table subplot
ax.set_yticks([]) # turn off y ticks for the table subplot
ax.table(cellText=[cell_text], colLabels=col_labels, loc='upper center')
plt.show()
The output looks like:
** UPDATE **
Using only one subplot, no xticklabels, sorted dates, nicer numbers with %g, and larger table cells using bbox :
import matplotlib as mpl
import matplotlib.pyplot as plt
time_and_dates_for_plot = {'04-26': 488.1063166666667,
'04-27': 289.7289333333333,
'04-28': 597.2343999999999,
'04-29': 0,
'04-30': 0,
'05-01': 1061.958075}
N = len(time_and_dates_for_plot)
colLabels = sorted(time_and_dates_for_plot.keys())
fig,ax = plt.subplots()
aa = ax.bar(range(N),[time_and_dates_for_plot[x] for x in colLabels],
align='center')
ax.set_xlabel('Date')
ax.set_ylabel('milliseconds')
ax.set_xticklabels([]) # turn off x ticks
ax.grid(True)
fig.subplots_adjust(bottom=0.25) # making some room for the table
cell_text = []
for key in colLabels:
cell_text += ["%g"%time_and_dates_for_plot[key]]
ax.table(cellText=[cell_text], colLabels=colLabels,
rowLabels=['ms'],cellLoc='center',
bbox=[0, -0.27, 1, 0.15])
ax.set_xlim(-0.5,N-0.5) # Helps having bars aligned with table columns
ax.set_title("milliseconds vs Date")
fig.savefig("Bar_graph.png")
plt.show()
Output:
** Update: Making room for the table using subplots_adjust **
Related
I usually don't ask questions on this platform, but I have a problem that quite bugs me.
Context
I have a function that plots data from a dataframe that has stockdata. It all works perfectly except for the fact that a second, empty window shows next to the actual graph whenever I execute this function. (image)
Here is all the relevant code, I'd be very grateful if some smart people could help me.
def plot(self):
plt.clf()
plt.cla()
colors = Colors()
data = self.getStockData()
if data.empty:
return
data.index = [TimeData.fromTimestamp(x) for x in data.index]
current, previous = data.iloc[-1, 1], data.iloc[0, 1]
percentage = (current / previous - 1) * 100
# Create a table
color = colors.decideColorPct(percentage)
# Create the table
fig = plt.figure(edgecolor=colors.NEUTRAL_COLOR)
fig.patch.set_facecolor(colors.BACKGROUND_COLOR)
plt.plot(data.close, color=color)
plt.title(self.__str2__(), color=colors.NEUTRAL_COLOR)
plt.ylabel("Share price in $", color=colors.NEUTRAL_COLOR)
plt.xlabel("Date", color=colors.NEUTRAL_COLOR)
ax = plt.gca()
ax.xaxis.set_major_formatter(plt_dates.DateFormatter('%Y/%m/%d %H:%M'))
ax.set_xticks([data.index[0], data.index[-1]])
ax.set_facecolor(colors.BACKGROUND_COLOR)
ax.tick_params(color=colors.NEUTRAL_COLOR, labelcolor=colors.NEUTRAL_COLOR)
for spine in ax.spines.values():
spine.set_edgecolor(colors.NEUTRAL_COLOR)
ax.yaxis.grid(True, color=colors.NEUTRAL_COLOR, linestyle=(0, (5, 10)), linewidth=.5)
plt.show()
Some notes:
Matplotlib never gets used in the program before this.
The data is standardized and consists of the following columns: open, low, high, close, volume.
The index of the dataframe exists of timestamps, which gets converted to an index of datetime objects at the following line: data.index = [TimeData.fromTimestamp(x) for x in data.index]
Remove plt.clf() and plt.cla() because it automatically creates window for plot when you don't have this window.
And later fig = plt.figure() creates new window which it uses to display your plot.
Minimal code for test
import matplotlib.pyplot as plt
import pandas as pd
data = pd.DataFrame({'x': [1,2,3], 'y': [2,3,1]})
#plt.clf()
#plt.cla()
fig = plt.figure()
plt.plot(data)
ax = plt.gca()
plt.show()
Hi I'm trying to plot a pointplot and scatterplot on one graph with the same dataset so I can see the individual points that make up the pointplot.
Here is the code I am using:
xlPath = r'path to data here'
df = pd.concat(pd.read_excel(xlPath, sheet_name=None),ignore_index=True)
sns.pointplot(data=df, x='ID', y='HM (N/mm2)', palette='bright', capsize=0.15, alpha=0.5, ci=95, join=True, hue='Layer')
sns.scatterplot(data=df, x='ID', y='HM (N/mm2)')
plt.show()
When I plot, for some reason the points from the scatterplot are offsetting one ID spot right on the x-axis. When I plot the scatter or the point plot separately, they each are in the correct ID spot. Why would plotting them on the same plot cause the scatterplot to offset one right?
Edit: Tried to make the ID column categorical, but that didn't work either.
Seaborn's pointplot creates a categorical x-axis while here the scatterplot uses a numerical x-axis.
Explicitly making the x-values categorical: df['ID'] = pd.Categorical(df['ID']), isn't sufficient, as the scatterplot still sees numbers. Changing the values to strings does the trick. To get them in the correct order, sorting might be necessary.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# first create some test data
df = pd.DataFrame({'ID': np.random.choice(np.arange(1, 49), 500),
'HM (N/mm2)': np.random.uniform(1, 10, 500)})
df['Layer'] = ((df['ID'] - 1) // 6) % 4 + 1
df['HM (N/mm2)'] += df['Layer'] * 8
df['Layer'] = df['Layer'].map(lambda s: f'Layer {s}')
# sort the values and convert the 'ID's to strings
df = df.sort_values('ID')
df['ID'] = df['ID'].astype(str)
fig, ax = plt.subplots(figsize=(12, 4))
sns.pointplot(data=df, x='ID', y='HM (N/mm2)', palette='bright',
capsize=0.15, alpha=0.5, ci=95, join=True, hue='Layer', ax=ax)
sns.scatterplot(data=df, x='ID', y='HM (N/mm2)', color='purple', ax=ax)
ax.margins(x=0.02)
plt.tight_layout()
plt.show()
Currently, I'm working on an introductory paper on data manipulation and such; however... the CSV I'm working on has some things I wish to do a scatter graph on!
I want a scatter graph to show me the volume sold on certain items as well as their average price, differentiating all data according to their region (Through colours I assume).
So what I want is to know if I can add the region column as a quantitative value
or if there's a way to make this possible...
It's my first time using Python and I'm confused way too often
I'm not sure if this is what you mean, but here is some working code, assuming you have data in the format of [(country, volume, price), ...]. If not, you can change the inputs to the scatter method as needed.
import random
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
n_countries = 50
# get the data into "countries", for example
countries = ...
# in this example: countries is [('BS', 21, 25), ('WZ', 98, 25), ...]
df = pd.DataFrame(countries)
# arbitrary method to get a color
def get_color(i, max_i):
cmap = matplotlib.cm.get_cmap('Spectral')
return cmap(i/max_i)
# get the figure and axis - make a larger figure to fit more points
# add labels for metric names
def get_fig_ax():
fig = plt.figure(figsize=(14,14))
ax = fig.add_subplot(1, 1, 1)
ax.set_xlabel('volume')
ax.set_ylabel('price')
return fig, ax
# switch around the assignments depending on your data
def get_x_y_labels():
x = df[1]
y = df[2]
labels = df[0]
return x, y, labels
offset = 1 # offset just so annotations aren't on top of points
x, y, labels = get_x_y_labels()
fig, ax = get_fig_ax()
# add a point and annotation for each of the labels/regions
for i, region in enumerate(labels):
ax.annotate(region, (x[i] + offset, y[i] + offset))
# note that you must use "label" for "legend" to work
ax.scatter(x[i], y[i], color=get_color(i, len(x)), label=region)
# Add the legend just outside of the plot.
# The .1, 0 at the end will put it outside
ax.legend(loc='upper right', bbox_to_anchor=(1, 1, .1, 0))
plt.show()
I need to create a chart, that has a grid like in the following picture.
The key factors being:
The x-axis is time with each tick marking 30 seconds
y-axes labels in the chart repeat at a variable interval
Chart must grow with the amount of data (i.e. for 30 minutes of data, it should be 60 boxes wide)
I have been looking into matplotlib for a bit, and it seems promising. I also managed to fill the chart with data. See my result for 40 Minutes of data.
But before I invest more time into research, I must know if this goal is even possible. If not I'll have to look into other charts. Thanks for your help!
Here is the source for the above image (my_data is actually read from a csv, but filled with random junk here):
from matplotlib import dates
import matplotlib.pyplot as plt
import numpy as np
import time
from datetime import datetime
my_data = list()
for i in range(3000):
my_data.append((datetime.fromtimestamp(i + time.time()), np.random.randint(50, 200), np.random.randint(10, 100)))
hfmt = dates.DateFormatter('%H:%M:%S')
fig = plt.figure()
actg = fig.add_subplot(2, 1, 1) # two rows, one column, first plot
plt.ylim(50, 210)
atoco = fig.add_subplot(2, 1, 2) # second plot
plt.ylim(0, 100)
actg.xaxis.set_minor_locator(dates.MinuteLocator())
actg.xaxis.set_major_formatter(hfmt)
atoco.xaxis.set_minor_locator(dates.MinuteLocator())
atoco.xaxis.set_major_formatter(hfmt)
plt.xticks(rotation=45)
times = []
fhr1 = []
toco = []
for key in my_data:
times.append(key[0])
fhr1.append(key[1])
toco.append(key[2])
actg.plot_date(times, fhr1, '-')
atoco.plot_date(times, toco, '-')
for ax in fig.axes:
ax.grid(True)
plt.tight_layout()
plt.show()
OK, here's something close to what you are after, I think.
I've used dates.SecondLocator(bysecond=[0,30]) to set the grid every 30 seconds (also need to make sure the grid is set on the minor ticks, with ax.xaxis.grid(True,which='both')
To repeat the yticklabels, I create a twinx of the axes for every major tick on the xaxis, and move the spine to that tick's location. I then set the spine color to none, so it doesn't show up, and turn of the actual ticks, but not the tick labels.
from matplotlib import dates
import matplotlib.pyplot as plt
import numpy as np
import time
from datetime import datetime
# how often to show xticklabels and repeat yticklabels:
xtickinterval = 10
# Make random data
my_data = list()
for i in range(3000):
my_data.append((datetime.fromtimestamp(i + time.time()), np.random.randint(120, 160), np.random.randint(10, 100)))
hfmt = dates.DateFormatter('%H:%M:%S')
fig = plt.figure()
actg = fig.add_subplot(2, 1, 1) # two rows, one column, first plot
actg.set_ylim(50, 210)
atoco = fig.add_subplot(2, 1, 2,sharex=actg) # second plot, share the xaxis with actg
atoco.set_ylim(-5, 105)
# Set the major ticks to the intervals specified above.
actg.xaxis.set_major_locator(dates.MinuteLocator(byminute=np.arange(0,60,xtickinterval)))
# Set the minor ticks to every 30 seconds
minloc = dates.SecondLocator(bysecond=[0,30])
minloc.MAXTICKS = 3000
actg.xaxis.set_minor_locator(minloc)
# Use the formatter specified above
actg.xaxis.set_major_formatter(hfmt)
times = []
fhr1 = []
toco = []
for key in my_data:
times.append(key[0])
fhr1.append(key[1])
toco.append(key[2])
print times[-1]-times[0]
# Make your plot
actg.plot_date(times, fhr1, '-')
atoco.plot_date(times, toco, '-')
for ax in [actg,atoco]:
# Turn off the yticklabels on the right hand side
ax.set_yticklabels([])
# Set the grids
ax.xaxis.grid(True,which='both',color='r')
ax.yaxis.grid(True,which='major',color='r')
# Create new yticklabels every major tick on the xaxis
for tick in ax.get_xticks():
tx = ax.twinx()
tx.set_ylim(ax.get_ylim())
tx.spines['right'].set_position(('data',tick))
tx.spines['right'].set_color('None')
for tic in tx.yaxis.get_major_ticks():
tic.tick1On = tic.tick2On = False
plt.tight_layout()
plt.show()
I want to create a bar chart of two series (say 'A' and 'B') contained in a Pandas dataframe. If I wanted to just plot them using a different y-axis, I can use secondary_y:
df = pd.DataFrame(np.random.uniform(size=10).reshape(5,2),columns=['A','B'])
df['A'] = df['A'] * 100
df.plot(secondary_y=['A'])
but if I want to create bar graphs, the equivalent command is ignored (it doesn't put different scales on the y-axis), so the bars from 'A' are so big that the bars from 'B' are cannot be distinguished:
df.plot(kind='bar',secondary_y=['A'])
How can I do this in pandas directly? or how would you create such graph?
I'm using pandas 0.10.1 and matplotlib version 1.2.1.
Don't think pandas graphing supports this. Did some manual matplotlib code.. you can tweak it further
import pylab as pl
fig = pl.figure()
ax1 = pl.subplot(111,ylabel='A')
#ax2 = gcf().add_axes(ax1.get_position(), sharex=ax1, frameon=False, ylabel='axes2')
ax2 =ax1.twinx()
ax2.set_ylabel('B')
ax1.bar(df.index,df.A.values, width =0.4, color ='g', align = 'center')
ax2.bar(df.index,df.B.values, width = 0.4, color='r', align = 'edge')
ax1.legend(['A'], loc = 'upper left')
ax2.legend(['B'], loc = 'upper right')
fig.show()
I am sure there are ways to force the one bar further tweak it. move bars further apart, one slightly transparent etc.
Ok, I had the same problem recently and even if it's an old question, I think that I can give an answer for this problem, in case if someone else lost his mind with this. Joop gave the bases of the thing to do, and it's easy when you only have (for exemple) two columns in your dataframe, but it becomes really nasty when you have a different numbers of columns for the two axis, due to the fact that you need to play with the position argument of the pandas plot() function. In my exemple I use seaborn but it's optionnal :
import pandas as pd
import seaborn as sns
import pylab as plt
import numpy as np
df1 = pd.DataFrame(np.array([[i*99 for i in range(11)]]).transpose(), columns = ["100"], index = [i for i in range(11)])
df2 = pd.DataFrame(np.array([[i for i in range(11)], [i*2 for i in range(11)]]).transpose(), columns = ["1", "2"], index = [i for i in range(11)])
fig, ax = plt.subplots()
ax2 = ax.twinx()
# we must define the length of each column.
df1_len = len(df1.columns.values)
df2_len = len(df2.columns.values)
column_width = 0.8 / (df1_len + df2_len)
# we calculate the position of each column in the plot. This value is based on the position definition :
# Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center)
# http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.plot.html
df1_posi = 0.5 + (df2_len/float(df1_len)) * 0.5
df2_posi = 0.5 - (df1_len/float(df2_len)) * 0.5
# In order to have nice color, I use the default color palette of seaborn
df1.plot(kind='bar', ax=ax, width=column_width*df1_len, color=sns.color_palette()[:df1_len], position=df1_posi)
df2.plot(kind='bar', ax=ax2, width=column_width*df2_len, color=sns.color_palette()[df1_len:df1_len+df2_len], position=df2_posi)
ax.legend(loc="upper left")
# Pandas add line at x = 0 for each dataframe.
ax.lines[0].set_visible(False)
ax2.lines[0].set_visible(False)
# Specific to seaborn, we have to remove the background line
ax2.grid(b=False, axis='both')
# We need to add some space, the xlim don't manage the new positions
column_length = (ax2.get_xlim()[1] - abs(ax2.get_xlim()[0])) / float(len(df1.index))
ax2.set_xlim([ax2.get_xlim()[0] - column_length, ax2.get_xlim()[1] + column_length])
fig.patch.set_facecolor('white')
plt.show()
And the result : http://i.stack.imgur.com/LZjK8.png
I didn't test every possibilities but it looks like it works fine whatever the number of columns in each dataframe you use.