I've made a function to graph economic performance, but the output is often lopsided on the y-axis.
The below graph shows the problem. The range of y values makes the chart default to the max/min as the range of the y axis.
Is there any way to force the chart to center itself on 0, or do I need derive the max and min y values within the function?
The function is below. If you'd like me to replace the variables with values to repro the chart lmk- it's a bit of a task.
def recession_comparison(key, variable, dimension):
'''
Creates the "scary chart"- proportional growth for a single area/industry. All recessions included in chart.
Parameters:
key (str or int): area-fips or industry_code
variable (str): determines what economic indicator will be used in the timeline. Must be one of ['month3_emplvl' (employment), 'avg_wkly_wage' (wages), 'qtrly_estabs_count'(firms)]
dimension (str): dimension of data to chart.
Returns:
fig (matplotlib plot)
'''
fig, ax = plt.subplots(figsize =(15, 10))
if dimension == 'area':
index = 'area_fips'
title = 'Recession Comparison, ' + area_titles[key] + " (" + str(key) + ")"
elif dimension == 'industry':
index = 'industry_code'
title = 'Recession Comparison: ' + industry_titles[key] + " (" + str(key) + ")"
for recession in recessions_int.keys():
if recession == 'full':
break
loadpath = filepath(variable = variable, dimension = dimension, charttype = 'proportional', recession = recession, filetype = 'json')
df = pd.read_json(loadpath)
df.set_index(index, inplace = True)
ax.plot(df.loc[key][1:-1]*100, label = str(recession), linewidth = 1.5, alpha = 0.8)
ax.axvline(x = 6, color = 'black', linewidth = 0.8, alpha = 0.5, ls = ':', label = 'Event Quarter')
ax.axhline(y = 0, color = 'black', linewidth = 0.8, alpha = 0.5, ls = '--', label = 'Pre-Recession baseline')
ax.set_xlabel('Quarters since start of recession')
ax.set_ylabel('Growth: ' + var_display[variable])
ax.set_title(title)
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
plt.legend()
plt.show()
return fig
edit: full code solution from DapperDuck:
def recession_comparison(key, variable, dimension):
fig, ax = plt.subplots(figsize =(15, 10))
if dimension == 'area':
index = 'area_fips'
title = 'Recession Comparison, ' + area_titles[key] + " (" + str(key) + ")"
elif dimension == 'industry':
index = 'industry_code'
title = 'Recession Comparison: ' + industry_titles[key] + " (" + str(key) + ")"
for recession in recessions_int.keys():
if recession == 'full':
break
loadpath = filepath(variable = variable, dimension = dimension, charttype = 'proportional', recession = recession, filetype = 'json')
df = pd.read_json(loadpath)
df.set_index(index, inplace = True)
ax.plot(df.loc[key][1:-1]*100, label = str(recession), linewidth = 1.5, alpha = 0.8)
ax.axvline(x = 6, color = 'black', linewidth = 0.8, alpha = 0.5, ls = ':', label = 'Event Quarter')
ax.axhline(y = 0, color = 'black', linewidth = 0.8, alpha = 0.5, ls = '--', label = 'Pre-Recession baseline')
yabs_max = abs(max(ax.get_ylim(), key=abs))
ax.set_ylim(ymin=-yabs_max, ymax=yabs_max)
ax.set_xlabel('Quarters since start of recession')
ax.set_ylabel('Growth: ' + var_display[variable])
ax.set_title(title)
ax.yaxis.set_major_formatter(mtick.PercentFormatter())
plt.legend()
plt.show()
return fig
Corrected image:
Add the following code right after ax.axhline(y = 0, color = 'black', linewidth = 0.8, alpha = 0.5, ls = '--', label = 'Pre-Recession baseline'):
yabs_max = abs(max(ax.get_ylim(), key=abs))
ax.set_ylim(ymin=-yabs_max, ymax=yabs_max)
Related
I tried to label coefficient and Confidence interval using the following code:
pp =p.ggplot(leadslags_plot, p.aes(x = 'label', y = 'mean',
ymin = 'lb',
ymax = 'ub')) +\
p.geom_line(p.aes(group = 1),color = "b") +\
p.geom_pointrange(color = "b",size = 0.5) +\
p.geom_errorbar(color = "r", width = 0.2) +\
p.scale_color_manual(name= "label:", values = ['b','r'],labels = ["coeff","95 percent CI"] )+\
p.theme("bottom") +\
p.xlab("Years before and after ") +\
p.ylab("value ") +\
p.geom_hline(yintercept = 0,
linetype = "dashed") +\
p.geom_vline(xintercept = 0,
linetype = "dashed")
the code generates the plot but does not label the 'coeff' and 'CI'. How can I label 'coeff' and 'CI'
The issue is that to get a legend you have to map on aesthetics. In ggplot2 (the R one) this could be easily achieved by moving color="b" inside aes() which however does not work in plotnine or Python. Maybe there is a more pythonistic way to get around this issue but one option would be to add two helper columns to your dataset which could then be mapped on the color aes:
import pandas as pd
import plotnine as p
leadslags_plot = [[-2, 1, 0, 2], [0, 2, 1, 3], [2, 3, 2, 4]]
leadslags_plot = pd.DataFrame(leadslags_plot, columns=['label', 'mean', 'lb', 'ub'])
leadslags_plot["b"] = "b"
leadslags_plot["r"] = "r"
(p.ggplot(leadslags_plot, p.aes(x = 'label', y = 'mean',
ymin = 'lb',
ymax = 'ub')) +\
p.geom_line(p.aes(group = 1),color = "b") +\
p.geom_pointrange(p.aes(color = "b"),size = 0.5) +\
p.geom_errorbar(p.aes(color = "r"), width = 0.2) +\
p.scale_color_manual(name= "label:", values = ['b','r'], labels = ["coeff", "95 percent CI"] )+\
p.theme("bottom", subplots_adjust={'right': 0.8}) +\
p.xlab("Years before and after ") +\
p.ylab("value ") +\
p.geom_hline(yintercept = 0,
linetype = "dashed") +\
p.geom_vline(xintercept = 0,
linetype = "dashed"))
with open('data/covid.csv', encoding="utf8") as file_obj:
file_data = csv.DictReader(file_obj, skipinitialspace = True)
#storing info of dose 1 in dictionary
dicti = {}
for row in file_data:
key = row['State/UTs']
if key in dicti:
dicti[key] = row['Dose 1']
else:
key = row['State/UTs']
dicti[key] = row['Dose 1']
print(dicti)
valuesL = list(dicti.values())
for i in range(0, len(valuesL)):
valuesL[i] = int(valuesL[i])
plt.figure(figsize=(18,12))
ax = plt.bar(list(dicti.keys()),valuesL, color = "green")
plt.xlabel("State/uts", fontsize = 30)
plt.ylabel("Number of people who have taken dose 1", fontsize = 30)
plt.title('States VS people who have taken dose 1', fontsize = 30)
plt.xticks(rotation=80)
for patch in ax.patches:
width = patch.get_width()
height = patch.get_height()
x = patch.get_x()
y = patch.get_y()
plt.text(x , y + height + 4, '{}'.format(height))
plt.show()
this is my code the graph shows the y axis to be ranging from 0.5 to 1, I want it to change according to the values of dictionary that range from 10000 to 10000000
Seems like your looking for set_ylim.
bottom = 10000
top = 10000000
ax.set_ylim(bottom, top)
or
plt.ylim(bottom, top)
I would like to add a label to a line in plotnine. I get the following error when using geom_text:
'NoneType' object has no attribute 'copy'
Sample code below:
df = pd.DataFrame({
'date':pd.date_range(start='1/1/1996', periods=4*25, freq='Q'),
'small': pd.Series([0.035]).repeat(4*25) ,
'large': pd.Series([0.09]).repeat(4*25),
})
fig1 = (ggplot()
+ geom_step(df, aes(x='date', y='small'))
+ geom_step(df, aes(x='date', y='large'))
+ scale_x_datetime(labels=date_format('%Y'))
+ scale_y_continuous(labels=lambda l: ["%d%%" % (v * 100) for v in l])
+ labs(x=None, y=None)
+ geom_text(aes(x=pd.Timestamp('2000-01-01'), y = 0.0275, label = 'small'))
)
print(fig1)
Edit:
has2k1's answer below solves the error, but I get:
I want this: (from R)
R code:
ggplot() +
geom_step(data=df, aes(x=date, y=small), color='#117DCF', size=0.75) +
geom_step(data=df, aes(x=date, y=large), color='#FF7605', size=0.75) +
scale_y_continuous(labels = scales::percent, expand = expand_scale(), limits = c(0,0.125)) +
labs(x=NULL, y=NULL) +
geom_text(aes(x = as.Date('1996-01-07'), y = 0.0275, label = 'small'), color = '#117DCF', size=5)
Any documentation beyond https://plotnine.readthedocs.io/en/stable/index.html? I have read the geom_text there and still can't produce what I need...
geom_text has no dataframe. If you want to print the text put it in quotes i.e. '"small"' or put the label mapping outside aes(), but it makes more sense to use annotate.
(ggplot(df)
...
# + geom_text(aes(x=pd.Timestamp('2000-01-01'), y = 0.0275, label = '"small"'))
# + geom_text(aes(x=pd.Timestamp('2000-01-01'), y = 0.0275), label = 'small')
+ annotate('text', x=pd.Timestamp('2000-01-01'), y = 0.0275, label='small')
)
I use the geopandas to visualize the data in the map after I merge the goepandas data frame with the numeric data frame. I want to render this data once the user hover over the country that this data belongs to it .right now I mange to render the coordinate as annotation where the mouse is over but I need the data
enter code genertate() : Map_Figure, Map_Graph = plt.subplots()
dataset = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
world = pd.DataFrame(dataset[['name', 'geometry', 'iso_a3']])
data2 = (records['Location'].value_counts()).reset_index()
data2.columns = ['name', 'Number']
data2['Number'] = data2['Number'].apply(int)
annot = Map_Graph.annotate("", xy=(0, 0), xytext=(20, 20), textcoords="offset points",
bbox=dict(boxstyle="round", fc="w"),
arrowprops=dict(arrowstyle="->"))
datum = world.set_index('name').join(data2.set_index('name'))
World = geopandas.GeoDataFrame(datum)
#print(Map_Graph)
basePlot = World.plot(ax=Map_Graph, column='Number', linewidth=0.1, edgecolor='black', cmap='nipy_spectral',
vmin=World['Number'].min(), vmax=World['Number'].max(), legend=True)
# worldPlot= World.plot(ax=Map_Graph, color= 'white', edgecolor='black', cmap='tab10', scheme='QUANTILES', figsize=(8, 4))
locations = data2.values
box = []
for word, count in locations[:10]:
string = '%s | %s posts' % (word, human_format(count))
if len(string) >= 27:
string = string[:27] + " \n" + string[27:]
else:
string = string
box.append(string)
s = '\n'.join(box)
mapText = '%s ' % (s)
Map_Graph.text(-340, -80, mapText, size='small', wrap=True,
bbox={'boxstyle': 'round', 'facecolor': 'white', 'alpha': 0.5, 'pad': 0.8})
Map_Graph.set_axis_off()
Map_Figure.set_tight_layout(True)
Map_Figure.savefig("Report/data/Map.png")
mapCursor(Map_Graph)
Map_Figure.set_size_inches(6, 3.3)
return Map_Figure,basePlothere
and the fowling code to render the data where the mouse hover
enter code here
self.mapFigure,ax= AM.generate_Map()
#hover = HoverTool(tooltips=[('Country/region', '#country'), ('% obesity', '#per_cent_obesity')])
self.mapGraphs = FigureCanvasWxAgg(self.gPanel_map, -1, self.mapFigure)
datacursor(ax, xytext=(15, -15), bbox=dict(fc='white'), arrowprops=None, hover=True,
formatter='{x.d}: \n{y}'.format)
As far as I'm aware, I've copied the documentation exactly. I basically used the documentation code and tweaked it for my purposes. But when I run this bit of code, no hover feature with text appears on my plot.
#Initialize df
aviation_data = pd.DataFrame(columns=["Latitude","Longitude","Fatalities"])
aviation_data["Latitude"] = [40.53666,60.94444]
aviation_data["Longitude"] = [-81.955833,-159.620834]
aviation_data["Fatalities"] = [True,False]
#Initialize colorscale
scl = [[0,"rgb(216,15,15)"],[1,"rgb(5,10,172)"]]
#Initialize text data
text_df = "Fatal: " + aviation_data["Fatalities"].apply(lambda x: str(np.bool(x))) + '<br>' + \
"Latitude: " + aviation_data["Latitude"].apply(lambda x: str(x)) + '<br>' + \
"Longitude" + aviation_data["Longitude"].apply(lambda x: str(x))
#Initialize data
data = [ dict(
type = 'scattergeo',
locationmode = 'USA-states',
lon = aviation_data["Longitude"],
lat = aviation_data["Latitude"],
text = text_df,
mode = 'markers',
marker = dict(
size = 5,
opacity = 0.5,
reversescale=True,
autocolorscale=False,
symbol = 'circle',
line = dict(
width=1,
color='rgba(102, 102, 102)'
),
colorscale = scl,
cmin = 0,
color = aviation_data["Fatalities"].astype(int),
cmax = 1
))]
#Initialize layout
layout = dict(
title ='Aviation Incidents for the Years 2014-2016<br>\
(red indicates fatal incident, blue indicates non-fatal)',
geo = dict(
scope='usa',
projection=dict(type='albers usa'),
showland = True,
landcolor = "rgb(206, 206, 206)",
countrywidth = 0.5,
subunitwidth = 0.5
),
)
#Plot
fig = dict(data=data,layout=layout)
iplot(fig,validate=False)
Anyone know why my hover text isn't showing up?
In the last line of code you need to call this:
plotly.offline.plot(fig, validate=False)
Instead of:
iplot(fig, validate=False)
Also do not forget import plotly:
import plotly
Hope this will help