I usually don't ask questions on this platform, but I have a problem that quite bugs me.
Context
I have a function that plots data from a dataframe that has stockdata. It all works perfectly except for the fact that a second, empty window shows next to the actual graph whenever I execute this function. (image)
Here is all the relevant code, I'd be very grateful if some smart people could help me.
def plot(self):
plt.clf()
plt.cla()
colors = Colors()
data = self.getStockData()
if data.empty:
return
data.index = [TimeData.fromTimestamp(x) for x in data.index]
current, previous = data.iloc[-1, 1], data.iloc[0, 1]
percentage = (current / previous - 1) * 100
# Create a table
color = colors.decideColorPct(percentage)
# Create the table
fig = plt.figure(edgecolor=colors.NEUTRAL_COLOR)
fig.patch.set_facecolor(colors.BACKGROUND_COLOR)
plt.plot(data.close, color=color)
plt.title(self.__str2__(), color=colors.NEUTRAL_COLOR)
plt.ylabel("Share price in $", color=colors.NEUTRAL_COLOR)
plt.xlabel("Date", color=colors.NEUTRAL_COLOR)
ax = plt.gca()
ax.xaxis.set_major_formatter(plt_dates.DateFormatter('%Y/%m/%d %H:%M'))
ax.set_xticks([data.index[0], data.index[-1]])
ax.set_facecolor(colors.BACKGROUND_COLOR)
ax.tick_params(color=colors.NEUTRAL_COLOR, labelcolor=colors.NEUTRAL_COLOR)
for spine in ax.spines.values():
spine.set_edgecolor(colors.NEUTRAL_COLOR)
ax.yaxis.grid(True, color=colors.NEUTRAL_COLOR, linestyle=(0, (5, 10)), linewidth=.5)
plt.show()
Some notes:
Matplotlib never gets used in the program before this.
The data is standardized and consists of the following columns: open, low, high, close, volume.
The index of the dataframe exists of timestamps, which gets converted to an index of datetime objects at the following line: data.index = [TimeData.fromTimestamp(x) for x in data.index]
Remove plt.clf() and plt.cla() because it automatically creates window for plot when you don't have this window.
And later fig = plt.figure() creates new window which it uses to display your plot.
Minimal code for test
import matplotlib.pyplot as plt
import pandas as pd
data = pd.DataFrame({'x': [1,2,3], 'y': [2,3,1]})
#plt.clf()
#plt.cla()
fig = plt.figure()
plt.plot(data)
ax = plt.gca()
plt.show()
Related
I have created a barplot for given days of the year and the number of people born on this given day (figure a). I want to set the x-axes in my seaborn barplot to xlim = (0,365) to show the whole year.
But, once I use ax.set_xlim(0,365) the bar plot is simply moved to the left (figure b).
This is the code:
#data
df = pd.DataFrame()
df['day'] = np.arange(41,200)
df['born'] = np.random.randn(159)*100
#plot
f, axes = plt.subplots(4, 4, figsize = (12,12))
ax = sns.barplot(df.day, df.born, data = df, hue = df.time, ax = axes[0,0], color = 'skyblue')
ax.get_xaxis().set_label_text('')
ax.set_xticklabels('')
ax.set_yscale('log')
ax.set_ylim(0,10e3)
ax.set_xlim(0,366)
ax.set_title('SE Africa')
How can I set the x-axes limits to day 0 and 365 without the bars being shifted to the left?
IIUC, the expected output given the nature of data is difficult to obtain straightforwardly, because, as per the documentation of seaborn.barplot:
This function always treats one of the variables as categorical and draws data at ordinal positions (0, 1, … n) on the relevant axis, even when the data has a numeric or date type.
This means the function seaborn.barplot creates categories based on the data in x (here, df.day) and they are linked to integers, starting from 0.
Therefore, it means even if we have data from day 41 onwards, seaborn is going to refer the starting category with x = 0, making for us difficult to tweak the lower limit of x-axis post function call.
The following code and corresponding plot clarifies what I explained above:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# data
rng = np.random.default_rng(101)
day = np.arange(41,200)
born = rng.integers(low=0, high=10e4, size=200-41)
df = pd.DataFrame({"day":day, "born":born})
# plot
f, ax = plt.subplots(figsize=(4, 4))
sns.barplot(data=df, x='day', y='born', ax=ax, color='b')
ax.set_xlim(0,365)
ax.set_xticks(ticks=np.arange(0, 365, 30), labels=np.arange(0, 365, 30))
ax.set_yscale('log')
ax.set_title('SE Africa')
plt.tight_layout()
plt.show()
I suggest using matplotlib.axes.Axes.bar to overcome this issue, although handling colors of the bars would be not straightforward compared to sns.barplot(..., hue=..., ...) :
# plot
f, ax = plt.subplots(figsize=(4, 4))
ax.bar(x=df.day, height=df.born) # instead of sns.barplot
ax.get_xaxis().set_label_text('')
ax.set_xlim(0,365)
ax.set_yscale('log')
ax.set_title('SE Africa')
plt.tight_layout()
plt.show()
I have gathered a code to make plots from data from multiple days. I have a data file containing over 40 days and 19k timestamps, and I need a plot, one for each day. I want python to generate them as different plots.
Mr. T helped me a lot with providing the code, but I cannot manage the code to get it to plot individual plots instead of all in one subplot. Can somebody help me with this?
Picture shows the current output:
My code:
import matplotlib.pyplot as plt
import numpy as np
#read your data and create datetime index
df= pd.read_csv('test-februari.csv', sep=";")
df.index = pd.to_datetime(df["Date"]+df["Time"].str[:-5], format="%Y:%m:%d %H:%M:%S")
#group by date and hour, count entries
dfcounts = df.groupby([df.index.date, df.index.hour]).size().reset_index()
dfcounts.columns = ["Date", "Hour", "Count"]
maxcount = dfcounts.Count.max()
#group by date for plotting
dfplot = dfcounts.groupby(dfcounts.Date)
#plot each day into its own subplot
fig, axs = plt.subplots(dfplot.ngroups, figsize=(6,8))
for i, groupdate in enumerate(dfplot.groups):
ax=axs[i]
#the marker is not really necessary but has been added in case there is just one entry per day
ax.plot(dfplot.get_group(groupdate).Hour, dfplot.get_group(groupdate).Count, color="blue", marker="o")
ax.set_title(str(groupdate))
ax.set_xlim(0, 24)
ax.set_ylim(0, maxcount * 1.1)
ax.xaxis.set_ticks(np.arange(0, 25, 2))
plt.tight_layout()
plt.show()
Welcome to the Stackoverflow.
Instead of creating multiple subplots, you can create a figure on the fly and plot onto it in every loop separately. And at the end show all of them at the same time.
for groupdate in dfplot.groups:
fig = plt.figure()
plt.plot(groupdate.Hour, groupdate.Count, color="blue", marker="o")
plt.title(str(groupdate))
plt.xlim(0, 24)
plt.ylim(0, maxcount * 1.1)
plt.xticks(np.arange(0, 25, 2))
plt.tight_layout()
plt.show()
I am trying to write a define function to plot a line graph by the data of a imported a csv file.
This a small sample of my data( temperature reading for every minutes):-
00:01:00.0305040, 35.35985
00:02:00.0438094, 35.48547
00:03:00.0571148, 35.65295
00:04:00.0704203, 35.90417
00:05:00.0837257, 36.23914
.
.
.
.
08:52:07.2370729, 74.92772
08:53:07.2503783, 75.01146
08:54:07.2648837, 75.05333
08:55:07.2781891, 75.0952
08:56:07.2914945, 75.0952
When I try to set the x ticker to be appear every hour, they do not show up in the plotted graph.
This is my code
df = pd.read_csv(file,names=["time", "temp"])
df["time"]=pd.to_datetime(df["time"])
df=df.set_index('time')
df.index = df.index.map (lambda t: t.strftime('%H:%M'))
print(df)
fig, ax = plt.subplots()
df.plot(ax = ax, color = 'black', linewidth = 0.4, x_compat=True)
ax.set(xlabel='Time (Hour:Minutes)', ylabel='Temperature (Celsius)')
ax.xaxis.set_major_locator(mdates.HourLocator(interval = 1))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate()
return plt.show()
I have tried labeling the x tickers manually
plt.xticks(['0:00', '1:00', '2:00', '3:00', '4:00:0', '5:00', '6:00:0', '7:00', '8:00', '9:00', '10:00'])
and it worked, but it there a way for any given case?
According to the official documentation
All of plotting functions expect np.array or np.ma.masked_array as input. Classes that are 'array-like' such as pandas data objects and np.matrix may or may not work as intended. It is best to convert these to np.array objects prior to plotting.
So I changed your code slightly (basically converted the pd df into numpy array).
df = pd.read_csv(file,names=["time", "temp"])
df["time"]=pd.to_datetime(df["time"])
x_axis = np.array(df.time.values)
y_axis = np.array(df.temp.values)
fig, ax = plt.subplots()
ax.plot(x_axis,y_axis)
ax.set(xlabel='Time (Hour:Minutes)', ylabel='Temperature (Celsius)')
ax.xaxis.set_major_locator(mdates.HourLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
plt.show()
The ticks are visible now as below.
I'm working with a dictionary of values which have a string (date) and float for time in milliseconds. I want to present the data in a bar graph and also with a table below. I have the bar graph working but the table gets messed up. I want the dates as columns and time as a single row.
The dictionary is something like:
time_and_dates_for_plot = {'04-26': 488.1063166666667, '04-27': 289.7289333333333, '04-28': 597.2343999999999, '04-29': 0, '04-30': 0, '05-01': 1061.958075}
plot.bar(range(len(time_and_dates_for_plot)), time_and_dates_for_plot.values(), align='center')
plot.xticks(range(len(time_and_dates_for_plot)), list(time_and_dates_for_plot.keys()))
plot.xlabel('Date (s)')
plot.ylabel('milliseconds')
plot.grid(True)
plot.gca().set_position((.1, .3, .8, .6))
col_labels = list(time_and_dates_for_plot.keys())
print(col_labels)
row_labels = ['ms']
cell_text = []
val = []
for key in time_and_dates_for_plot.keys():
val.append((time_and_dates_for_plot.get(key)))
cell_text.append(val)
val = []
print(cell_text)
plot.table(cellText=cell_text, colLabels=col_labels)
plot.show()
As you can see from the picture, I get all entries under one column where as I want something like one cell data under one coloumn (just tabulate plot data).
Also, how do I add some padding between the table and graph?
First time I'm using matplotlib and pretty sure I'm missing something. Any help is really appreciated.
In the table function you need an extra pair of brackets []. ...cellText=[cell_text]...
Also, you can use subplots to have a better arrangement of the plots. Here, my solution uses subplots of 2 rows withheight_ratiosof 8 to 1, and ahspace` pf 0.3
import matplotlib as mpl
import matplotlib.pyplot as plt
time_and_dates_for_plot = {'04-26': 488.1063166666667,
'04-27': 289.7289333333333,
'04-28': 597.2343999999999,
'04-29': 0,
'04-30': 0,
'05-01': 1061.958075}
fig,axs = plt.subplots(figsize=(8,5),ncols=1,nrows=2,
gridspec_kw={'height_ratios':[8,1],'hspace':0.3})
ax = axs[0]
ax.bar(range(len(time_and_dates_for_plot)),
time_and_dates_for_plot.values(), align='center')
ax.set_xticks(range(len(time_and_dates_for_plot)),
list(time_and_dates_for_plot.keys()))
ax.set_xlabel('Date (s)')
ax.set_ylabel('milliseconds')
ax.grid(True)
col_labels = list(time_and_dates_for_plot.keys())
row_labels = ['ms']
cell_text = []
for key in time_and_dates_for_plot.keys():
cell_text += [time_and_dates_for_plot[key]]
ax = axs[1]
ax.set_frame_on(False) # turn off frame for the table subplot
ax.set_xticks([]) # turn off x ticks for the table subplot
ax.set_yticks([]) # turn off y ticks for the table subplot
ax.table(cellText=[cell_text], colLabels=col_labels, loc='upper center')
plt.show()
The output looks like:
** UPDATE **
Using only one subplot, no xticklabels, sorted dates, nicer numbers with %g, and larger table cells using bbox :
import matplotlib as mpl
import matplotlib.pyplot as plt
time_and_dates_for_plot = {'04-26': 488.1063166666667,
'04-27': 289.7289333333333,
'04-28': 597.2343999999999,
'04-29': 0,
'04-30': 0,
'05-01': 1061.958075}
N = len(time_and_dates_for_plot)
colLabels = sorted(time_and_dates_for_plot.keys())
fig,ax = plt.subplots()
aa = ax.bar(range(N),[time_and_dates_for_plot[x] for x in colLabels],
align='center')
ax.set_xlabel('Date')
ax.set_ylabel('milliseconds')
ax.set_xticklabels([]) # turn off x ticks
ax.grid(True)
fig.subplots_adjust(bottom=0.25) # making some room for the table
cell_text = []
for key in colLabels:
cell_text += ["%g"%time_and_dates_for_plot[key]]
ax.table(cellText=[cell_text], colLabels=colLabels,
rowLabels=['ms'],cellLoc='center',
bbox=[0, -0.27, 1, 0.15])
ax.set_xlim(-0.5,N-0.5) # Helps having bars aligned with table columns
ax.set_title("milliseconds vs Date")
fig.savefig("Bar_graph.png")
plt.show()
Output:
** Update: Making room for the table using subplots_adjust **
I need to create a chart, that has a grid like in the following picture.
The key factors being:
The x-axis is time with each tick marking 30 seconds
y-axes labels in the chart repeat at a variable interval
Chart must grow with the amount of data (i.e. for 30 minutes of data, it should be 60 boxes wide)
I have been looking into matplotlib for a bit, and it seems promising. I also managed to fill the chart with data. See my result for 40 Minutes of data.
But before I invest more time into research, I must know if this goal is even possible. If not I'll have to look into other charts. Thanks for your help!
Here is the source for the above image (my_data is actually read from a csv, but filled with random junk here):
from matplotlib import dates
import matplotlib.pyplot as plt
import numpy as np
import time
from datetime import datetime
my_data = list()
for i in range(3000):
my_data.append((datetime.fromtimestamp(i + time.time()), np.random.randint(50, 200), np.random.randint(10, 100)))
hfmt = dates.DateFormatter('%H:%M:%S')
fig = plt.figure()
actg = fig.add_subplot(2, 1, 1) # two rows, one column, first plot
plt.ylim(50, 210)
atoco = fig.add_subplot(2, 1, 2) # second plot
plt.ylim(0, 100)
actg.xaxis.set_minor_locator(dates.MinuteLocator())
actg.xaxis.set_major_formatter(hfmt)
atoco.xaxis.set_minor_locator(dates.MinuteLocator())
atoco.xaxis.set_major_formatter(hfmt)
plt.xticks(rotation=45)
times = []
fhr1 = []
toco = []
for key in my_data:
times.append(key[0])
fhr1.append(key[1])
toco.append(key[2])
actg.plot_date(times, fhr1, '-')
atoco.plot_date(times, toco, '-')
for ax in fig.axes:
ax.grid(True)
plt.tight_layout()
plt.show()
OK, here's something close to what you are after, I think.
I've used dates.SecondLocator(bysecond=[0,30]) to set the grid every 30 seconds (also need to make sure the grid is set on the minor ticks, with ax.xaxis.grid(True,which='both')
To repeat the yticklabels, I create a twinx of the axes for every major tick on the xaxis, and move the spine to that tick's location. I then set the spine color to none, so it doesn't show up, and turn of the actual ticks, but not the tick labels.
from matplotlib import dates
import matplotlib.pyplot as plt
import numpy as np
import time
from datetime import datetime
# how often to show xticklabels and repeat yticklabels:
xtickinterval = 10
# Make random data
my_data = list()
for i in range(3000):
my_data.append((datetime.fromtimestamp(i + time.time()), np.random.randint(120, 160), np.random.randint(10, 100)))
hfmt = dates.DateFormatter('%H:%M:%S')
fig = plt.figure()
actg = fig.add_subplot(2, 1, 1) # two rows, one column, first plot
actg.set_ylim(50, 210)
atoco = fig.add_subplot(2, 1, 2,sharex=actg) # second plot, share the xaxis with actg
atoco.set_ylim(-5, 105)
# Set the major ticks to the intervals specified above.
actg.xaxis.set_major_locator(dates.MinuteLocator(byminute=np.arange(0,60,xtickinterval)))
# Set the minor ticks to every 30 seconds
minloc = dates.SecondLocator(bysecond=[0,30])
minloc.MAXTICKS = 3000
actg.xaxis.set_minor_locator(minloc)
# Use the formatter specified above
actg.xaxis.set_major_formatter(hfmt)
times = []
fhr1 = []
toco = []
for key in my_data:
times.append(key[0])
fhr1.append(key[1])
toco.append(key[2])
print times[-1]-times[0]
# Make your plot
actg.plot_date(times, fhr1, '-')
atoco.plot_date(times, toco, '-')
for ax in [actg,atoco]:
# Turn off the yticklabels on the right hand side
ax.set_yticklabels([])
# Set the grids
ax.xaxis.grid(True,which='both',color='r')
ax.yaxis.grid(True,which='major',color='r')
# Create new yticklabels every major tick on the xaxis
for tick in ax.get_xticks():
tx = ax.twinx()
tx.set_ylim(ax.get_ylim())
tx.spines['right'].set_position(('data',tick))
tx.spines['right'].set_color('None')
for tic in tx.yaxis.get_major_ticks():
tic.tick1On = tic.tick2On = False
plt.tight_layout()
plt.show()