How to change the density of x-tick in Matplotlib chart? - python

I plot a chart with matplotlib but the x-ticks are too crowded. May I know any solution to fix it?
from pandas_datareader import data
import datetime
tickers = 'AAPL'
dateToday = datetime.datetime.today().strftime("%Y-%m-%d")#年月日20190526
# Only get the adjusted close.
tickers_data = data.DataReader(tickers,
start='',
end=dateToday,
data_source='yahoo')[["Adj Close", "Volume"]][-250:]
returns = tickers_data.pct_change()
plt.figure(figsize=(12,6))
ax = sns.barplot(x=returns.index.strftime('%d/%-m'), y=returns['Adj Close'], color='#73a9d1')
plt.xticks(rotation = 90)
plt.title('Returns' + '\n' + tickers)
Output:

If you want to see, for example, every fifth x-tick (x-tick step is 5), you can improve your code in this way:
step = 5
x_values = returns.index.strftime('%d/%-m')
x_ticks_values = x_values[::step]
plt.figure(figsize = (12, 6))
ax = sns.barplot(x = x_values,
y = returns['Adj Close'],
color = '#73a9d1')
plt.xticks(ticks = np.arange(0, (len(x_values) + step), step),
labels = x_ticks_values,
rotation = 90)

Related

Seaborn Align twinx and x Axis

I am trying to align X axis with its twin but I'm not finding a way to do it.
Here is my code
# Initialize the figure
plt.figure(figsize=(16, 10))
# Adding a title
plt.title(f'Client Retention Quarters: Monthly Cohorts', fontsize = 14)
# Creating the heatmap
sns.heatmap(retention, annot = True,vmin = 0, vmax =30,cmap="flare", fmt='g')
plt.ylabel('Cohort Quarter')
plt.xlabel('')
plt.yticks( rotation='360')
#Twinx
ax2 = plt.twiny()
ax2.set_xticks(range(0,len(x2)))
ax2.set_xticklabels(labels=x2)
ax2.spines['top'].set_position(('axes', -0.10))
plt.show()
And here is the output:
I want to align the percentages with the x ticks.
Is it possible?
You can use the below updated code. See if this works. Note that I have used random data for retention and x2. Basically, the main change it to get the xlim()s for both axes and then adjust it (see lambda f) so that the ticks align. Finally use set_major_locator() to fix the points. Hope this is what you are looking for...
retention = np.random.rand(10, 12) ##My random data
# Initialize the figure
plt.figure(figsize=(16, 10))
# Adding a title
plt.title(f'Client Retention Quarters: Monthly Cohorts', fontsize = 14)
# Creating the heatmap
ax=sns.heatmap(retention, annot = True,vmin = 0, vmax =30,cmap="flare", fmt='g') ## Note I am assigning to ax
plt.ylabel('Cohort Quarter')
plt.xlabel('')
plt.yticks( rotation='360')
x2 = np.around(np.linspace(1, 25, 12),2)
#Twinx
ax2 = ax.twiny()
#ax2.set_xticks(range(0,len(x2))) ## Commented as not required
#ax2.set_xticklabels(labels=x2) ## Commented as not required
## New code here ##
import matplotlib.ticker
l = ax.get_xlim()
l2 = ax2.get_xlim()
f = lambda y : l2[0]+(y-l[0])/(l[1]-l[0])*(l2[1]-l2[0]) ##Add delta to each tick
ticks = f(ax.get_xticks())
ax2.xaxis.set_major_locator(matplotlib.ticker.FixedLocator(ticks)) ##Set the ticks
ax2.spines['top'].set_position(('axes', -0.10))
plt.show()

Show dates in xticks only where value exist in plot chart of multiple dataframes

I have got a matplotlib question about xticks. I wanted to hide all those values that do not occur. I actually did it, but for the second set of values (red chart). I found how to hide for a specific data frame but not for 2 or more.
This is my code:
plt.subplots(figsize=(2, 1), dpi=400)
width = 0.005
xlim = np.arange(0, 1, 0.01)
ylim = np.arange(0, 0.1, 0.001)
plt.xticks(density_2.index.unique(), rotation=90, fontsize=1.5)
plt.yticks(density_2.unique(), fontsize=2)
plt.bar(density_1.index, density_1, width, color='Green', label=condition_1,alpha=0.5)
plt.bar(density_2.index, density_2, width, color='Red', label=condition_2,alpha=0.5)
plt.legend(loc="upper right", fontsize=2)
plt.show()
Link where I saw the solution: show dates in xticks only where value exist in plot chart and hide unnecessary interpolated xtick labels
Thank you very much in advance!
You need to find the intersection of the two lists of density_1's and density_2's ticks, as reported here.
Working example:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
N = 150
values_1 = np.random.randint(low = 5, high = 75, size = N)/100
density_1 = pd.DataFrame({'density_1': values_1})
density_1 = density_1.value_counts().sort_index(ascending = True)
density_1.index = sorted(list(set(values_1)), reverse = False)
values_2 = np.random.randint(low = 35, high = 100, size = N)/100
density_2 = pd.DataFrame({'density_2': values_2})
density_2 = density_2.value_counts().sort_index(ascending = True)
density_2.index = sorted(list(set(values_2)), reverse = False)
width = 0.005
condition_1 = 'Adele'
condition_2 = 'Extremoduro'
fig, ax = plt.subplots(figsize = (10, 5))
ax.bar(density_1.index, density_1, width, color = 'Green', label = condition_1, alpha = 0.5)
ax.bar(density_2.index, density_2, width, color = 'Red', label = condition_2, alpha = 0.5)
ax.legend(loc = 'upper right')
ax.set_xticks(list(set(density_1.index.unique()) & set(density_2.index.unique())), rotation = 90)
plt.show()
In the line:
list(set(density_1.index.unique()) & set(density_2.index.unique()))
you can select ticks which blongs to both density_1 and density_2.
Zoom in:

plt.CLA or CLF in animations - Why does it not work for me to only show most recent plot?

I want my animation only to show the most previous point, and I believe that I have to adjust something around this line: plt.gca().cla()
Can anyone tell me what I am doing wrong? In my animation, all the points stay visible, while I only want to show the most previous points. Any suggestions?
This is my code:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
title = 'Occ'
x = np.array(df.x)
y = np.array(df.y)
Writer = animation.writers['ffmpeg']
writer = Writer(fps = 4, bitrate = 1800)
fig = plt.figure(figsize = (12, 8))
def animate(i):
plt.gca().cla()
data = df.iloc[:int(i + 1)] # select data range
p = sns.scatterplot(x = 'x', y = 'y', hue = 'id', data = data, s = 200, alpha = 0.5)
p.tick_params(labelsize = 17)
plt.setp(p.lines, linewidth = 7)
plt.xlim(0, 500)
plt.ylim(0, 500)
plt.xlabel('X', fontsize = 20)
plt.ylabel('Y', fontsize = 20)
plt.title('Occ', fontsize = 20)
ani = matplotlib.animation.FuncAnimation(fig, animate, frames = len(df), repeat = True, blit=False)
ani.save('Occ.mp4', writer = writer)
The line
data = df.iloc[:int(i + 1)] # select data range
select all the rows from 0 to i+1, therefore you are showing a growing number of points at each frame. If you want to show only the current point, you should do:
data = df.iloc[i] # select data range

Using Hlines ruins legends in Matplotlib

I'm struggling to adjust my plot legend after adding the axline/ hline on 100 level in the graph.(screenshot added)
if there's a way to run this correctly so no information will be lost in legend, and maybe add another hline and adding it to the legend.
adding the code here, maybe i'm not writing it properly.
fig, ax1 = plt.subplots(figsize = (9,6),sharex=True)
BundleFc_Outcome['Spend'].plot(kind = 'bar',color = 'blue',width = 0.4, ax = ax1,position = 1)
#
# Make the y-axis label, ticks and tick labels match the line color.
ax1.set_ylabel('SPEND', color='b', size = 18)
ax1.set_xlabel('Bundle FC',color='w',size = 18)
ax2 = ax1.twinx()
ax2.set_ylabel('ROAS', color='r',size = 18)
ax1.tick_params(axis='x', colors='w',size = 20)
ax2.tick_params(axis = 'y', colors='w',size = 20)
ax1.tick_params(axis = 'y', colors='w',size = 20)
#ax1.text()
#
ax2.axhline(100)
BundleFc_Outcome['ROAS'].plot(kind = 'bar',color = 'red',width = 0.4, ax = ax2,position = 0.25)
plt.grid()
#ax2.set_ylim(0, 4000)
ax2.set_ylim(0,300)
plt.title('ROAS & SPEND By Bundle FC',color = 'w',size= 20)
plt.legend([ax2,ax1],labels = ['SPEND','ROAS'],loc = 0)
The code gives me the following picture:
After implementing the suggestion in the comments, the picture looks like this (does not solve the problem):
You can use bbox_to_anchor attribute to set legend location manually.
ax1.legend([ax1],labels = ['SPEND'],loc='upper right', bbox_to_anchor=(1.25,0.70))
plt.legend([ax2,ax1],labels = ['SPEND','ROAS'],loc='upper right', bbox_to_anchor=(1.25,0.70))
https://matplotlib.org/users/legend_guide.html#legend-location
So finally figured it out , was simpler for a some reason
Even managed to add another threshold at level 2 for minimum spend.
fig, ax1 = plt.subplots(figsize = (9,6),sharex=True)
BundleFc_Outcome['Spend'].plot(kind = 'bar',color = 'blue',width = 0.4, ax = ax1,position = 1)
#
# Make the y-axis label, ticks and tick labels match the line color.
ax1.set_ylabel('SPEND', color='b', size = 18)
ax1.set_xlabel('Region',color='w',size = 18)
ax2 = ax1.twinx()
ax2.set_ylabel('ROAS', color='r',size = 18)
ax1.tick_params(axis='x', colors='w',size = 20)
ax2.tick_params(axis = 'y', colors='w',size = 20)
ax1.tick_params(axis = 'y', colors='w',size = 20)
#ax1.text()
#
BundleFc_Outcome['ROAS'].plot(kind = 'bar',color = 'red',width = 0.4, ax = ax2,position = 0.25)
plt.grid()
#ax2.set_ylim(0, 4000)
ax2.set_ylim(0,300)
plt.title('ROAS & SPEND By Region',color = 'w',size= 20)
fig.legend([ax2,ax1],labels = ['SPEND','ROAS'],loc = 0)
plt.hlines([100,20],xmin = 0,xmax = 8,color= ['r','b'])
I don't recommend using the builtin functions of pandas to do more complex plotting. Also when asking a question it is common courtesy to provide a minimal and verifiable example (see here). I took the liberty to simulate your problem.
Due to the change in axes, we need to generate our own legend. First the results:
Which can be achieved with:
import matplotlib.pyplot as plt, pandas as pd, numpy as np
# generate dummy data.
X = np.random.rand(10, 2)
X[:,1] *= 1000
x = np.arange(X.shape[0]) * 2 # xticks
df = pd.DataFrame(X, columns = 'Spend Roast'.split())
# end dummy data
fig, ax1 = plt.subplots(figsize = (9,6),sharex=True)
ax2 = ax1.twinx()
# tmp axes
axes = [ax1, ax2] # setup axes
colors = plt.cm.tab20(x)
width = .5 # bar width
# generate dummy legend
elements = []
# plot data
for idx, col in enumerate(df.columns):
tax = axes[idx]
tax.bar(x + idx * width, df[col], label = col, width = width, color = colors[idx])
element = tax.Line2D([0], [0], color = colors[idx], label = col) # setup dummy label
elements.append(element)
# desired hline
tax.axhline(200, color = 'red')
tax.set(xlabel = 'Bundle FC', ylabel = 'ROAST')
axes[0].set_ylabel('SPEND')
tax.legend(handles = elements)

Remove gaps between plotted Python Matplotlib candlestick data

I'd like to be able to plot stock data on my chart without any gaps where data is missing.
This answer had a way of doing it like this:
# the dates in my example file-set are very sparse (and annoying) change the dates to be sequential
for i in range(len(r)-1):
r['date'][i+1] = r['date'][i] + datetime.timedelta(days=1)
I didn't understand how or why that worked but tried to implement it in my code like this:
for i in range(len(date)-1):
date[i+1] = date[i] + dt.timedelta(days=1)
Here is the code for my chart:
fig = plt.figure()
fig.set_size_inches(13.5, 8.5)
ax1 = plt.subplot2grid((6,1), (0,0), rowspan=4, colspan=1)
ax1.yaxis.tick_right()
ax1.xaxis.set_ticks_position('bottom')
plt.title('title')
# Add a seconds axis for the volume overlay
ax2 = plt.subplot2grid((6,1), (4,0), rowspan=4, colspan=1)
ax2.xaxis.set_ticks_position('bottom')
stock_price_url = 'https://www.quandl.com/api/v3/datasets/WIKI/AAPL/data.csv?start_date=2015-06-01&order=asc&end_date=2015-08-01&collapse=daily'
source_code = urllib.urlopen(stock_price_url).read().decode()
stock_data = []
split_source = source_code.split('\n')
for line in split_source:
split_line = line.split(',')
if 'Date' not in line:
stock_data.append(line)
date, openp, highp, lowp, closep, volume = np.loadtxt(stock_data,
delimiter=',',
unpack=True,
converters={0:strpdate2num('%Y-%m-%d')},
usecols=(0,1,2,3,4,5))
x = 0
y = len(date)
ohlc = []
while x < y:
append_me = date[x], openp[x], closep[x], highp[x], lowp[x], volume[x]
ohlc.append(append_me)
x+=1
candlestick(ax1, ohlc, width=0.4, colorup='g', colordown='r')
# create the second axis for the volume bar-plot
ax2.yaxis.tick_right()
ax2.yaxis.get_major_formatter().set_scientific(False)
# set the position of ax2 so that it is short (y2=0.32) but otherwise the same size as ax
ax2.set_position(mpl.transforms.Bbox([[0.125,0.1],[0.9,0.32]]))
# get data from candlesticks for a bar plot
dates = [x[0] for x in ohlc]
dates = np.asarray(dates)
volume = [x[5] for x in ohlc]
volume = np.asarray(volume)
# make bar plots and color differently depending on up/down for the day
pos = openp-closep<0
neg = openp-closep>0
ax2.bar(dates[pos],volume[pos],color='green',width=1,align='center')
ax2.bar(dates[neg],volume[neg],color='red',width=1,align='center')
#scale the x-axis tight
ax1.set_xlim(min(dates),max(dates))
ax2.set_xlim(min(dates),max(dates))
# the y-ticks for the bar were too dense, keep only every third one
yticks = ax2.get_yticks()
ax2.set_yticks(yticks[::3])
# format the x-ticks with a human-readable date.
xt = ax1.get_xticks()
new_xticks = [dt.date.isoformat(num2date(d)) for d in xt]
ax1.set_xticklabels(new_xticks,rotation=45, horizontalalignment='right')
ax2.set_xticklabels(new_xticks,rotation=45, horizontalalignment='right')
plt.show()
Any help would be really awesome.

Categories