x and y-axis variables in the stem plots are the columns from the DataFrames
import pandas as pd
import matplotlib.pyplot as plt
fig, ax = plt.subplots(3, 1,figsize=(8,20))
fig.suptitle('Magnitude-Time Plot : R1')
fig.subplots_adjust(hspace=0.5)
# Defining custom 'xlim' and 'ylim' values.
xlim = (1990, 2005)
ylim = (0, 9)
# Setting the values for all axes.
plt.setp(ax, xlim=xlim, ylim=ylim)
# plot with no marker
#subplot(311) : Catalog8 in R1
markerline, stemlines, baseline = ax[0].stem(cata8q1.YearDeci,cata8q1.Magnitude, markerfmt=' ',use_line_collection=True,linefmt='b')
plt.setp(stemlines,linewidth=0.5)
ax[0].set_title('Catalog OLD')
#subplot(312) : catalog 9 _Uniq in R1
markerline, stemlines, baseline = ax[1].stem(cata9uniq1.YearDeci,cata9uniq1.Magnitude, markerfmt=' ',use_line_collection=True,linefmt='r')
plt.setp(stemlines,linewidth=0.5)
ax[1].set_title('Unique events in NEW CATALOG')
#subplot(312) : Catalog NEW in R1
markerline, stemlines, baseline = ax[2].stem(catanewq1.YearDeci,catanewq1.Magnitude, markerfmt=' ',use_line_collection=True)
plt.setp(stemlines,linewidth=0.5)
ax[2].set_title('OLD + unique events in NEW Catalog')
plt.show()
I am trying to plot the [3,1] stem subplots, I want to control the axes properties and figure properties more handy , with less number of lines of code
You can do something like this.
catalogs = [(cata8q1,"Catalog Old", "b"), ...]
for i, (catalog, title, linefmt) in enumerate (catalogs):
markerline, stemlines, baseline = ax[0].stem(catalog.YearDeci,catalog.Magnitude, markerfmt=' ',use_line_collection=True,linefmt=linefmt)
plt.setp(stemlines,linewidth=0.5)
ax[i].set_title(title)
Related
I have created a barplot for given days of the year and the number of people born on this given day (figure a). I want to set the x-axes in my seaborn barplot to xlim = (0,365) to show the whole year.
But, once I use ax.set_xlim(0,365) the bar plot is simply moved to the left (figure b).
This is the code:
#data
df = pd.DataFrame()
df['day'] = np.arange(41,200)
df['born'] = np.random.randn(159)*100
#plot
f, axes = plt.subplots(4, 4, figsize = (12,12))
ax = sns.barplot(df.day, df.born, data = df, hue = df.time, ax = axes[0,0], color = 'skyblue')
ax.get_xaxis().set_label_text('')
ax.set_xticklabels('')
ax.set_yscale('log')
ax.set_ylim(0,10e3)
ax.set_xlim(0,366)
ax.set_title('SE Africa')
How can I set the x-axes limits to day 0 and 365 without the bars being shifted to the left?
IIUC, the expected output given the nature of data is difficult to obtain straightforwardly, because, as per the documentation of seaborn.barplot:
This function always treats one of the variables as categorical and draws data at ordinal positions (0, 1, … n) on the relevant axis, even when the data has a numeric or date type.
This means the function seaborn.barplot creates categories based on the data in x (here, df.day) and they are linked to integers, starting from 0.
Therefore, it means even if we have data from day 41 onwards, seaborn is going to refer the starting category with x = 0, making for us difficult to tweak the lower limit of x-axis post function call.
The following code and corresponding plot clarifies what I explained above:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# data
rng = np.random.default_rng(101)
day = np.arange(41,200)
born = rng.integers(low=0, high=10e4, size=200-41)
df = pd.DataFrame({"day":day, "born":born})
# plot
f, ax = plt.subplots(figsize=(4, 4))
sns.barplot(data=df, x='day', y='born', ax=ax, color='b')
ax.set_xlim(0,365)
ax.set_xticks(ticks=np.arange(0, 365, 30), labels=np.arange(0, 365, 30))
ax.set_yscale('log')
ax.set_title('SE Africa')
plt.tight_layout()
plt.show()
I suggest using matplotlib.axes.Axes.bar to overcome this issue, although handling colors of the bars would be not straightforward compared to sns.barplot(..., hue=..., ...) :
# plot
f, ax = plt.subplots(figsize=(4, 4))
ax.bar(x=df.day, height=df.born) # instead of sns.barplot
ax.get_xaxis().set_label_text('')
ax.set_xlim(0,365)
ax.set_yscale('log')
ax.set_title('SE Africa')
plt.tight_layout()
plt.show()
I have the columns of a dataframe plotted as separate histogram subplots. For each subplot, I want the bars coloured according to the value in a separate list. I have managed this by making a cmap of it and manually cycling those colours, however, is there a way to add a colorbar to the side to show what values these colours belong to? This is what I have right now:
import pandas as pd
import matplotlib as mpl
from matplotlib.colors import rgb2hex
#reading in the data
df = pd.read_csv( "shortlist_temp.dat", sep='\t',header=(0), usecols=(range(1,13)))
#separate list of values
orig_star_teff = [4308.0, 5112.0, 4240.0, 4042.0, 4411.0, 4100.0, 4511.0, 4738.0, 4630.0, 4870.0, 4442.0, 4845.0]
#Colormapping the values. I did not like the result from the original values so I reduced by 4000.
orig_star_teff_norm = [i - 4000 for i in orig_star_teff]
orig_star_teff_norm = [float(i)/max(orig_star_teff_norm) for i in orig_star_teff_norm]
cmap = mpl.cm.plasma
color_list = cmap(orig_star_teff_norm)
color_list2 = [ rgb2hex(color_list[i,:]) for i in range(color_list.shape[0]) ]
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color = color_list2)
ax = df.plot.hist(subplots=True, bins = 12, legend=False, layout=(3, 4), figsize = (15,10), sharey = True)
ax[0,0].set_title('ABOO')
ax[0,1].set_title('EpsVIR')
ax[0,2].set_title('HIP 96014')
ax[0,3].set_title('2M16113361')
ax[1,0].set_title('KIC 3955590')
ax[1,1].set_title('KIC 5113061')
ax[1,2].set_title('KIC 5859492')
ax[1,3].set_title('KIC 6547007')
ax[2,0].set_title('KIC 11444313')
ax[2,1].set_title('KIC 11657684')
ax[2,2].set_title('HD102328-K3III')
ax[2,3].set_title('HD142091-K0III')
Resulting plot
Instead of doing all the normalization steps manually, it probably is easier to create a norm. In this case a norm that maps the values from 4000 till max to the range 0,1 needed for the colormap. Note that converting to hex isn't necessary.
With the norm and the colormap a ScalarMapple can be created with all the necessary information for a colorbar:
import pandas as pd
import matplotlib as mpl
from matplotlib.cm import ScalarMappable
# reading in the data
# df = pd.read_csv("shortlist_temp.dat", sep='\t', header=(0), usecols=(range(1, 13)))
# generating some dummy data
df = pd.DataFrame(np.random.randn(100, 12))
# separate list of values
orig_star_teff = [4308.0, 5112.0, 4240.0, 4042.0, 4411.0, 4100.0, 4511.0, 4738.0, 4630.0, 4870.0, 4442.0, 4845.0]
norm = plt.Normalize(4000, max(orig_star_teff))
cmap = mpl.cm.plasma
color_list = cmap(norm(orig_star_teff))
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=color_list)
axs = df.plot.hist(subplots=True, bins=12, legend=False, layout=(3, 4), figsize=(15, 10), sharey=True)
titles = ['ABOO', 'EpsVIR', 'HIP 96014', '2M16113361',
'KIC 3955590', 'KIC 5113061', 'KIC 5859492', 'KIC 6547007',
'KIC 11444313', 'KIC 11657684', 'HD102328-K3III', 'HD142091-K0III']
for ax, title in zip(axs.flat, titles):
ax.set_title(title)
plt.colorbar(ScalarMappable(cmap=cmap, norm=norm), ax=axs[:, -1])
plt.show()
I'm working with a dictionary of values which have a string (date) and float for time in milliseconds. I want to present the data in a bar graph and also with a table below. I have the bar graph working but the table gets messed up. I want the dates as columns and time as a single row.
The dictionary is something like:
time_and_dates_for_plot = {'04-26': 488.1063166666667, '04-27': 289.7289333333333, '04-28': 597.2343999999999, '04-29': 0, '04-30': 0, '05-01': 1061.958075}
plot.bar(range(len(time_and_dates_for_plot)), time_and_dates_for_plot.values(), align='center')
plot.xticks(range(len(time_and_dates_for_plot)), list(time_and_dates_for_plot.keys()))
plot.xlabel('Date (s)')
plot.ylabel('milliseconds')
plot.grid(True)
plot.gca().set_position((.1, .3, .8, .6))
col_labels = list(time_and_dates_for_plot.keys())
print(col_labels)
row_labels = ['ms']
cell_text = []
val = []
for key in time_and_dates_for_plot.keys():
val.append((time_and_dates_for_plot.get(key)))
cell_text.append(val)
val = []
print(cell_text)
plot.table(cellText=cell_text, colLabels=col_labels)
plot.show()
As you can see from the picture, I get all entries under one column where as I want something like one cell data under one coloumn (just tabulate plot data).
Also, how do I add some padding between the table and graph?
First time I'm using matplotlib and pretty sure I'm missing something. Any help is really appreciated.
In the table function you need an extra pair of brackets []. ...cellText=[cell_text]...
Also, you can use subplots to have a better arrangement of the plots. Here, my solution uses subplots of 2 rows withheight_ratiosof 8 to 1, and ahspace` pf 0.3
import matplotlib as mpl
import matplotlib.pyplot as plt
time_and_dates_for_plot = {'04-26': 488.1063166666667,
'04-27': 289.7289333333333,
'04-28': 597.2343999999999,
'04-29': 0,
'04-30': 0,
'05-01': 1061.958075}
fig,axs = plt.subplots(figsize=(8,5),ncols=1,nrows=2,
gridspec_kw={'height_ratios':[8,1],'hspace':0.3})
ax = axs[0]
ax.bar(range(len(time_and_dates_for_plot)),
time_and_dates_for_plot.values(), align='center')
ax.set_xticks(range(len(time_and_dates_for_plot)),
list(time_and_dates_for_plot.keys()))
ax.set_xlabel('Date (s)')
ax.set_ylabel('milliseconds')
ax.grid(True)
col_labels = list(time_and_dates_for_plot.keys())
row_labels = ['ms']
cell_text = []
for key in time_and_dates_for_plot.keys():
cell_text += [time_and_dates_for_plot[key]]
ax = axs[1]
ax.set_frame_on(False) # turn off frame for the table subplot
ax.set_xticks([]) # turn off x ticks for the table subplot
ax.set_yticks([]) # turn off y ticks for the table subplot
ax.table(cellText=[cell_text], colLabels=col_labels, loc='upper center')
plt.show()
The output looks like:
** UPDATE **
Using only one subplot, no xticklabels, sorted dates, nicer numbers with %g, and larger table cells using bbox :
import matplotlib as mpl
import matplotlib.pyplot as plt
time_and_dates_for_plot = {'04-26': 488.1063166666667,
'04-27': 289.7289333333333,
'04-28': 597.2343999999999,
'04-29': 0,
'04-30': 0,
'05-01': 1061.958075}
N = len(time_and_dates_for_plot)
colLabels = sorted(time_and_dates_for_plot.keys())
fig,ax = plt.subplots()
aa = ax.bar(range(N),[time_and_dates_for_plot[x] for x in colLabels],
align='center')
ax.set_xlabel('Date')
ax.set_ylabel('milliseconds')
ax.set_xticklabels([]) # turn off x ticks
ax.grid(True)
fig.subplots_adjust(bottom=0.25) # making some room for the table
cell_text = []
for key in colLabels:
cell_text += ["%g"%time_and_dates_for_plot[key]]
ax.table(cellText=[cell_text], colLabels=colLabels,
rowLabels=['ms'],cellLoc='center',
bbox=[0, -0.27, 1, 0.15])
ax.set_xlim(-0.5,N-0.5) # Helps having bars aligned with table columns
ax.set_title("milliseconds vs Date")
fig.savefig("Bar_graph.png")
plt.show()
Output:
** Update: Making room for the table using subplots_adjust **
I need to create a chart, that has a grid like in the following picture.
The key factors being:
The x-axis is time with each tick marking 30 seconds
y-axes labels in the chart repeat at a variable interval
Chart must grow with the amount of data (i.e. for 30 minutes of data, it should be 60 boxes wide)
I have been looking into matplotlib for a bit, and it seems promising. I also managed to fill the chart with data. See my result for 40 Minutes of data.
But before I invest more time into research, I must know if this goal is even possible. If not I'll have to look into other charts. Thanks for your help!
Here is the source for the above image (my_data is actually read from a csv, but filled with random junk here):
from matplotlib import dates
import matplotlib.pyplot as plt
import numpy as np
import time
from datetime import datetime
my_data = list()
for i in range(3000):
my_data.append((datetime.fromtimestamp(i + time.time()), np.random.randint(50, 200), np.random.randint(10, 100)))
hfmt = dates.DateFormatter('%H:%M:%S')
fig = plt.figure()
actg = fig.add_subplot(2, 1, 1) # two rows, one column, first plot
plt.ylim(50, 210)
atoco = fig.add_subplot(2, 1, 2) # second plot
plt.ylim(0, 100)
actg.xaxis.set_minor_locator(dates.MinuteLocator())
actg.xaxis.set_major_formatter(hfmt)
atoco.xaxis.set_minor_locator(dates.MinuteLocator())
atoco.xaxis.set_major_formatter(hfmt)
plt.xticks(rotation=45)
times = []
fhr1 = []
toco = []
for key in my_data:
times.append(key[0])
fhr1.append(key[1])
toco.append(key[2])
actg.plot_date(times, fhr1, '-')
atoco.plot_date(times, toco, '-')
for ax in fig.axes:
ax.grid(True)
plt.tight_layout()
plt.show()
OK, here's something close to what you are after, I think.
I've used dates.SecondLocator(bysecond=[0,30]) to set the grid every 30 seconds (also need to make sure the grid is set on the minor ticks, with ax.xaxis.grid(True,which='both')
To repeat the yticklabels, I create a twinx of the axes for every major tick on the xaxis, and move the spine to that tick's location. I then set the spine color to none, so it doesn't show up, and turn of the actual ticks, but not the tick labels.
from matplotlib import dates
import matplotlib.pyplot as plt
import numpy as np
import time
from datetime import datetime
# how often to show xticklabels and repeat yticklabels:
xtickinterval = 10
# Make random data
my_data = list()
for i in range(3000):
my_data.append((datetime.fromtimestamp(i + time.time()), np.random.randint(120, 160), np.random.randint(10, 100)))
hfmt = dates.DateFormatter('%H:%M:%S')
fig = plt.figure()
actg = fig.add_subplot(2, 1, 1) # two rows, one column, first plot
actg.set_ylim(50, 210)
atoco = fig.add_subplot(2, 1, 2,sharex=actg) # second plot, share the xaxis with actg
atoco.set_ylim(-5, 105)
# Set the major ticks to the intervals specified above.
actg.xaxis.set_major_locator(dates.MinuteLocator(byminute=np.arange(0,60,xtickinterval)))
# Set the minor ticks to every 30 seconds
minloc = dates.SecondLocator(bysecond=[0,30])
minloc.MAXTICKS = 3000
actg.xaxis.set_minor_locator(minloc)
# Use the formatter specified above
actg.xaxis.set_major_formatter(hfmt)
times = []
fhr1 = []
toco = []
for key in my_data:
times.append(key[0])
fhr1.append(key[1])
toco.append(key[2])
print times[-1]-times[0]
# Make your plot
actg.plot_date(times, fhr1, '-')
atoco.plot_date(times, toco, '-')
for ax in [actg,atoco]:
# Turn off the yticklabels on the right hand side
ax.set_yticklabels([])
# Set the grids
ax.xaxis.grid(True,which='both',color='r')
ax.yaxis.grid(True,which='major',color='r')
# Create new yticklabels every major tick on the xaxis
for tick in ax.get_xticks():
tx = ax.twinx()
tx.set_ylim(ax.get_ylim())
tx.spines['right'].set_position(('data',tick))
tx.spines['right'].set_color('None')
for tic in tx.yaxis.get_major_ticks():
tic.tick1On = tic.tick2On = False
plt.tight_layout()
plt.show()
I have a bar graph of 150 values.The code is :
rcParams.update({'figure.autolayout': True})
plt.figure(figsize=(14,9), dpi=600)
reso_names = [x[0] for x in resolution3]
reso_values = [x[1] for x in resolution3]
plt.bar(range(len(reso_values[0:20])), reso_values[0:20], align='center')
plt.xticks(range(len(reso_names[0:20])), list(reso_names[0:20]), rotation='vertical')
plt.margins(0.075)
plt.xlabel('Resolution Category Tier 3')
plt.ylabel('Volume')
plt.title('Resolution Category Tier 3 Volume', {'family' : 'Arial Black',
'weight' : 'bold',
'size' : 22})
plt.savefig('Reso3.pdf', format='pdf')
plt.show()
Since I want to break it down into sub-graphs of 20 each to maintain readability I'm using the [0:20] at the reso_names and reso_values (both lists.
However the problem is that scale cannot be maintained, each sub-graphs have very different scales and that is a problem in terms of consistency not being maintained. How can I set a scale that can be maintained across all the graphs.
You can specify sharey=True to keep the y-scale same in all subplots.
import numpy as np
import matplotlib.pyplot as plt
x = np.random.randint(1, 10, 10)
y = np.random.randint(1, 100, 10)
fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)
# do simple plot here, replace barplot yourself
axes[0].plot(x)
axes[1].plot(y)
Or if you prefer to plot them separately, you can manually configure ax.set_ylim().