Python Bubble Chart Legands- TypeError - python

Here's my code:
import pandas
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
%matplotlib inline
pandas.set_option ('max_columns',10)
df= pandas.read_csv('C:/Users/HP/Desktop/Coding/Python/2.Python Data Analysis/Module 2- Python Data Visualization/M2-Bubble Chart with Labels and Legend data.csv')
plt.scatter(x=df['GDP per Capita'],y=df['Life Span'],s=df['Population']/1000, alpha=0.5, c=df['Bubble color'])
#alpha at 0.5 to set the transparency
#chart title, axis labels
plt.title('GDP,Lifespan,Population (Bubble size)')
plt.xlabel(' GDP')
plt.ylabel('Lifespan')
#bubble labels
x,y= df['GDP per Capita'],df['Life Span']
for i, txt in enumerate (df['Territory']):
plt.annotate(txt,(x[i],y[i]))
print(i,txt,x[i],y[i],df['Population'][i],df['Bubble color'][i])
#annotate: is used to assign the population with the chart sp have from text, the x and y will be the next one
#it will print out the index number with the one that assigned with it as well
#legend
territory_list=list(df['Territory'])
bubble_color_list = list(df['Bubble color'])
l = []
for i in range (0,len(df.index)):
l.append(mpatches.Patch(color=bubble_color_list[i],
alpha=0.5,
label=territory_list[i]))
plt.legend(handles=1,loc=(2,0))
#the i is in the For loop is just basically like the one above to have all the information
I am looking to generate a Bubble chart with a legend for it but somehow it does not show the legend like it suppose to, just only the chart, and then show this message.
`TypeError: 'int' object is not iterable`
What am I doing wrong?

You cannot have handles=1 in plt.legend(handles=1,loc=(2,0)).
handles must be a container, such as a list, tuple, etc...
Not only that, but a container of integers in unacceptable. Do not write handles=[1, 2, 3]
The following code shows how to properly call the legend method:
import numpy as np
import matplotlib.pyplot as plt
# Make some fake data.
a = b = np.arange(0, 3, .02)
c = np.exp(a)
d = c[::-1]
fig, ax = plt.subplots()
line1 = ax.plot(a, c, 'k--')
line2 = ax.plot(a, d, 'k:')
line3 = ax.plot(a, c + d, 'k')
ax.legend((line1, line2, line3), ('line 1', 'line 2', 'line 3'), loc=(2,0))
plt.show()

Related

Seaborn boxplot : set median color and set tick label colors to boxes color

I'm using this nice boxplot graph, answer from #Parfait.
I got an out of bound error on j and had to use range(i*5,i*5+5). Why?
I'd like to set the median to a particular color, let's say red. medianprops=dict(color="red") won't work. How to do it?
How to set the y-axis tick labels to the same color as the boxes?
Disclaimer: I don't know what I'm doing.
Here's the code using random data :
# import the required library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import string
import matplotlib.colors as mc
import colorsys
# data
df = pd.DataFrame(np.random.normal(np.random.randint(5,15),np.random.randint(1,5),size=(100, 16)), columns=list(string.ascii_uppercase)[:16])
# Boxplot
fig, ax = plt.subplots(figsize=(9, 10))
medianprops=dict(color="red")
ax = sns.boxplot(data=df, orient="h", showfliers=False, palette = "husl")
ax = sns.stripplot(data=df, orient="h", jitter=True, size=7, alpha=0.5, palette = "husl") # show data points
ax.set_title("Title")
plt.xlabel("X label")
def lighten_color(color, amount=0.5):
# --------------------- SOURCE: #IanHincks ---------------------
try:
c = mc.cnames[color]
except:
c = color
c = colorsys.rgb_to_hls(*mc.to_rgb(c))
return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2])
for i,artist in enumerate(ax.artists):
# Set the linecolor on the artist to the facecolor, and set the facecolor to None
col = lighten_color(artist.get_facecolor(), 1.2)
artist.set_edgecolor(col)
# Each box has 6 associated Line2D objects (to make the whiskers, fliers, etc.)
# Loop over them here, and use the same colour as above
for j in range(i*5,i*5+5):
line = ax.lines[j]
line.set_color(col)
line.set_mfc(col)
line.set_mec(col)
#line.set_linewidth(0.5)
To change the color of the median, you can use the medianprops in sns.boxplot(..., medianprops=...). If you also set a unique label, that label can be tested again when iterating through the lines.
To know how many lines belong to each boxplot, you can divide the number of lines by the number of artists (just after the boxplot has been created, before other elements have been added to the plot). Note that a line potentially has 3 colors: the line color, the marker face color and the marker edge color. Matplotlib creates the fliers as an invisible line with markers. The code below thus also changes these colors to make it more robust to different options and possible future changes.
Looping simultaneously through the boxes and the y tick labels allows copying the color. Making them a bit larger and darker helps for readability.
import matplotlib.pyplot as plt
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb, to_rgb
import seaborn as sns
import pandas as pd
import numpy as np
def enlighten(color, factor=0.5):
h, s, v = rgb_to_hsv(to_rgb(color))
return hsv_to_rgb((h, s, 1 - factor * (1 - v)))
def endarken(color, factor=0.5):
h, s, v = rgb_to_hsv(to_rgb(color))
return hsv_to_rgb((h, s, factor * v))
df = pd.DataFrame(np.random.normal(1, 5, size=(100, 16)).cumsum(axis=0),
columns=['Hydrogen', 'Helium', 'Lithium', 'Beryllium', 'Boron', 'Carbon', 'Nitrogen', 'Oxygen',
'Fluorine', 'Neon', 'Sodium', 'Magnesium', 'Aluminum', 'Silicon', 'Phosphorus', 'Sulfur'])
sns.set_style('white')
fig, ax = plt.subplots(figsize=(9, 10))
colors = sns.color_palette("husl", len(df.columns))
sns.boxplot(data=df, orient="h", showfliers=False, palette='husl',
medianprops=dict(color="yellow", label='median'), ax=ax)
lines_per_boxplot = len(ax.lines) // len(ax.artists)
for i, (box, ytick) in enumerate(zip(ax.artists, ax.get_yticklabels())):
ytick.set_color(endarken(box.get_facecolor()))
ytick.set_fontsize(20)
color = enlighten(box.get_facecolor())
box.set_color(color)
for lin in ax.lines[i * lines_per_boxplot: (i + 1) * lines_per_boxplot]:
if lin.get_label() != 'median':
lin.set_color(color)
lin.set_markerfacecolor(color)
lin.set_markeredgecolor(color)
sns.stripplot(data=df, orient="h", jitter=True, size=7, alpha=0.5, palette='husl', ax=ax)
sns.despine(ax=ax)
ax.set_title("Title")
ax.set_xlabel("X label")
plt.tight_layout()
plt.show()
I just answer point 2. of my question.
After tinkering, I found this to work :
# Each box has 5 associated Line2D objects (the whiskers and median)
# Loop over them here, and use the same colour as above
n=5 # this was for tinkering
for j in range(i*n,i*n+n):
if j != i*n+4 : line = ax.lines[j] # not the median
line.set_color(col)
Again, I don't know what I'm doing. So someone more knowledgeable may provide a more valuable answer.
I removed the stripplot for better clarity.

How to place arrows between multiple scatter points

The code below produce this graph. I wonder if there is a way to make the lines between value1 and value2 into arrows, pointing in the direction of 1 to 2, from blue to green (In this case none of blues is lower than the greens).
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Create a dataframe
value1=np.random.uniform(size=20)
value2=value1+np.random.uniform(size=20)/4
df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'value1':value1 , 'value2':value2 })
# Reorder it following the values of the first value:
ordered_df = df.sort_values(by='value1')
my_range=range(1,len(df.index)+1)
# The horizontal plot is made using the hline function
plt.hlines(y=my_range, xmin=ordered_df['value1'], xmax=ordered_df['value2'], color='grey', alpha=0.4)
plt.scatter(ordered_df['value1'], my_range, color='skyblue', alpha=1, label='value1')
plt.scatter(ordered_df['value2'], my_range, color='green', alpha=0.4 , label='value2')
plt.legend()
# Add title and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("Comparison of the value 1 and the value 2", loc='left')
plt.xlabel('Value of the variables')
plt.ylabel('Group')
# Show the graph
plt.show()
The best option for multiple arrows is matplotlib.pyplot.quiver, because it accepts an array or dataframe of locations, unlike matplotlib.pyplot.arrow, which only accepts a single value.
Since the y-axis labels are defined by 'group', which are letters, use V = np.zeros(len(ordered_df)) or V = ordered_df.index - ordered_df.index for the .quiver direction vector.
Plot the dataframe directly with pandas.DataFrame.plot and kind='scatter'.
Tested in python 3.8.12, pandas 1.3.3, matplotlib 3.4.3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Create a dataframe
np.random.seed(354)
value1=np.random.uniform(size=20)
value2=value1+np.random.uniform(size=20)/4
df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'value1':value1 , 'value2':value2 })
# Reorder it following the values of the first value and reset the index so the index values correspond to the y-axis tick locations
ordered_df = df.sort_values(by='value1').reset_index(drop=True)
# plot the dataframe
ax = ordered_df.plot(kind='scatter', x='value1', y='group', color='skyblue', alpha=1, figsize=(8, 6), label='value1')
ordered_df.plot(kind='scatter', x='value2', y='group', color='green', alpha=1, ax=ax, label='value2', xlabel='Value of the variables', ylabel='Group')
# plot the arrows
V = ordered_df.index - ordered_df.index # the Y direction vector is 0 for each
ax.quiver(ordered_df.value1, ordered_df.group, (ordered_df.value2-ordered_df.value1), V, width=0.003, color='gray', scale_units='x', scale=1)
# Add title with position
ax.set_title("Comparison of the value 1 and the value 2", loc='left')
# Show the graph
plt.show()
You can use plt.arrow instead of plt.hlines, but you have to loop over the rows:
for y, (_, row) in enumerate(ordered_df.iterrows()):
arrow_head_length = 0.02
plt.arrow(x=row['value1'], y=y+1, dx=row['value2']-row['value1']-arrow_head_length, dy=0,
head_width=0.5, head_length=arrow_head_length, fc='k', ec='k',
color='grey', alpha=0.4)
example:

Matplotlib : How to populate the below chart having all the x-axis labels and grid lines accordingly?

data = {'tenor': ['1w','1m','3m','6m','12m','1y','2y','3y','4y','5y','6y','7y','10y','15y','20y','25y','30y','40y','50y'],'rate_s': [0.02514, 0.026285, 0.0273, 0.0279, 0.029616, 0.026526, 0.026028, 0.024, 0.025958,0.0261375, 0.026355, 0.026, 0.026898, 0.0271745, 0.02741, 0.027, 0.0275, 0.0289,0.0284],'rate_t':[ 0.02314, 0.024285, 0.0253,0.0279, 0.028616, 0.026526,0.027028, 0.024, 0.025958,0.0271375, 0.02355, 0.026, 0.024898, 0.0271745, 0.02641,0.027, 0.0255, 0.0289,0.0284]}
I want to produce the chart in blue with the same format like below. I tried this piece of code but results are not satisfactory (chart in white). It also not showing all x-axis labels. Please suggest.
ax = plt.gca()
df.plot(kind='line',x='tenor',y='rate_s',marker='o',color='green',ax=ax)
df.plot(kind='line',x='tenor',y='rate_y',marker='o', color='red', ax=ax)
ax.minorticks_on()
ax.grid(which='major',linestyle='-', linewidth='0.5', color='blue')
ax.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
plt.show()
This is following the discussions in the comments.
There are a couple parts, the full example is at the bottom.
Style
One of your questions was how to change the style of the plot. This can be done with the following code:
import matplotlib.pyplot as plt
plt.style.use('seaborn-darkgrid')
there are many possible styles, and you can even create your own style if you wish. To see all possible styles see: the documentation. To list all styles use plt.style.available
Custom Ticker
For the custom tickers: you can use FixedLocator or if you know it is log or symlog, then matplotlib has a built-in locator. See the matplotlib doc for scales
You can use FixedLocator to set up the axis, to be separated. i.e. the following code will give you what you want.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
X = np.arange(0, 2000)
Y = np.arange(0, 2000)
def convert(date):
if 'w' in date:
return 7*int(date[:-1])
if 'm' in date:
return 30*int(date[:-1])
if 'y' in date:
return 30*int(date[:-1]) + 360
ticks = [convertdate(d) for d in tenor]
plt.style.use('seaborn-darkgrid')
ax = plt.axes()
t = ticker.FixedLocator(locs=ticks)
ax.xaxis.set_ticklabels(tenor)
ax.xaxis.set_major_locator(t)
# ax.xaxis.set_minor_locator(ticker.MultipleLocator(3))
plt.plot(X, Y, c = 'k')
plt.show()
Which produces:
Specific Case
For your specific case, you probably want the custom tickers to be on a specific interval (i.e. smallest of rate_t, biggest of rate_t).
Thus you would need to change the convert function to be as following:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
x = data['rate_t']
y = data['rate_s']
def get_indices(date):
if 'w' in date:
return 7*int(date[:-1])
if 'm' in date:
return 30*int(date[:-1])
if 'y' in date:
return 30*int(date[:-1]) + 360
def convert(indices):
x = np.linspace(min(data['rate_t']), max(data['rate_t']), indices[-1] + 1)
return x[indices]
indices = [get_indices(d) for d in tenor]
ticks = convert(indices)
plt.style.use('seaborn-darkgrid')
ax = plt.axes()
t = ticker.FixedLocator(locs=ticks)
ax.xaxis.set_ticklabels(tenor)
ax.xaxis.set_major_locator(t)
# ax.xaxis.set_minor_locator(ticker.MultipleLocator(3))
plt.plot(x, y, c = 'k')
plt.show()
(assuming the data['rate_s'] and data['rate_t'] are as is and without processing)
Which would produce this:
Let me know if you have any questions.

(python matplotlib ) How to change color of each lollipop in a lollipop plot (ax.stem)

I am using ax.stem to draw lollipop plot in python. However, I found it difficult to assign different colors to each lollipop
as shown here
As you can see I have 2 categories "GWP" & "FDP".
In my project, each category should be divided into 4 subcategories "ingredient", "Waste", "energy" and "infrastructure". Therefore, I want to assign them different colors to indicate the subcategory.
There is a solution proposed here: https://python-graph-gallery.com/181-custom-lollipop-plot/
But this only teaches you how to change color for all lollipops.
And there is another solution: https://python-graph-gallery.com/183-highlight-a-group-in-lollipop/
But this one doesn't really use ax.stem.
Please let me know how to assign different colors to each lollipop.
(Also, I don't know somehow why my plot is displayed upside down. Also, the y axis does not align in order, and there is one dot not connected by a line. It displays correctly in my original plot though.)
Here is my code:
#%%
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')
# my dataset
columns = np.array(['types', 'GWP100 (year)', 'FDP (year)'])
types = np.array(['Total (ingredient) per kg', 'Total (waste) per kg',
'energy (whole process) per kg', 'Infrastructure', 'Total (Total)']).reshape(5,1)
gwp = np.array([ 2.86982617e+02, 2.16824983e+02, 4.38920760e+01,
6.02400000e-02, 5.47759916e+02]).reshape(5,1)
fdp = np.array([ 1.35455867e+02, 7.02868322e+00, 1.26622560e+01,
1.64568000e-02, 1.55163263e+02]).reshape(5,1)
original_data = np.concatenate((types, gwp, fdp), axis = 1)
# produce dataframe
data = pd.DataFrame(original_data, columns = columns)
# types GWP100 (year) FDP (year)
#0 Total (ingredient) per kg 286.982617 135.455867
#1 Total (waste) per kg 216.824983 7.02868322
#2 energy (whole process) per kg 43.892076 12.662256
#3 Infrastructure 0.06024 0.0164568
#4 Total (Total) 547.759916 155.163263
#%% graph
fig = plt.figure(1, figsize =(8,6))
# 1st subplot
ax1 = fig.add_subplot(1,2,1)
gwp = data[data.columns[1]]
ax1.stem(gwp)
ax1.set_ylabel(r'kg CO$_2$-Eq', fontsize=10)
ax1.set_xlabel('GWP', fontsize=10)
# 2nd subplot
ax2 = fig.add_subplot(1,2,2)
fdp = data[data.columns[2]]
ax2.stem(fdp)
ax2.set_ylabel(r'kg oil-Eq', fontsize = 10)
ax2.set_xlabel('FDP', fontsize=10)
The stem currently consists of a couple of lines and a "line" consisting of dots on top. It has no option to colorize the lines separately within its interface.
You may replicate the stem plot to draw the lines manually with the color you like.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
columns = np.array(['types', 'GWP100 (year)', 'FDP (year)'])
types = np.array(['Total (ingredient) per kg', 'Total (waste) per kg',
'energy (whole process) per kg', 'Infrastructure', 'Total (Total)'])
gwp = np.array([ 2.86982617e+02, 2.16824983e+02, 4.38920760e+01,
6.02400000e-02, 5.47759916e+02])
fdp = np.array([ 1.35455867e+02, 7.02868322e+00, 1.26622560e+01,
1.64568000e-02, 1.55163263e+02])
# produce dataframe
data = pd.DataFrame([types,gwp,fdp], index = columns).transpose()
colors = list("bgryk")
fig, (ax, ax2) = plt.subplots(ncols=2)
for t, y, c in zip(data["types"], data["GWP100 (year)"],colors):
ax.plot([t,t], [0,y], color=c, marker="o", markevery=(1,2))
ax.set_ylim(0,None)
plt.setp(ax.get_xticklabels(), rotation=90)
fig.tight_layout()
plt.show()
A more efficient solution is of course to use a LineCollection in combination with a scatter plot for the dots.
fig, (ax, ax2) = plt.subplots(ncols=2)
segs = np.zeros((len(data), 2, 2))
segs[:,:,0] = np.repeat(np.arange(len(data)),2).reshape(len(data),2)
segs[:,1,1] = data["GWP100 (year)"].values
lc = LineCollection(segs, colors=colors)
ax.add_collection(lc)
ax.scatter(np.arange(len(data)), data["GWP100 (year)"].values, c=colors)
ax.set_xticks(np.arange(len(data)))
ax.set_xticklabels(data["types"], rotation=90)
ax.autoscale()
ax.set_ylim(0,None)
fig.tight_layout()
plt.show()
I will answer one of your main questions regarding the same coloring of the lines and markers category wise. There seems to be no direct option while calling ax1.stem() to specify the list of colors as per the official docs. In fact they say that the resulting plot might not be reasonable if one do so. Nevertheless, below is one trick to get things done your way.
The idea is following:
Get the objects (stemline) displayed on the subplot
Get the x-y data of the markers
Loop over the data and change the color of each stemline. Plot the marker individually with the same color as stemline. The colors is an array specifying the colors of your choice.
Following is the relevant part of the code:
# 1st subplot
ax1 = fig.add_subplot(1,2,1)
gwp = data[data.columns[1]]
colors = ['r', 'g', 'b', 'y', 'k']
_, stemlines, _ = ax1.stem(gwp)
line = ax1.get_lines()
xd = line[0].get_xdata()
yd = line[0].get_ydata()
# mec and mfc stands for markeredgecolor and markerfacecolor
for i in range(len(stemlines)):
plt.plot([xd[i]], [yd[i]], 'o', ms=7, mfc=colors[i], mec=colors[i])
plt.setp(stemlines[i], 'color', colors[i])
ax1.set_ylabel(r'kg CO$_2$-Eq', fontsize=10)
ax1.set_xlabel('GWP', fontsize=10)
# 2nd subplot
ax2 = fig.add_subplot(1,2,2)
fdp = data[data.columns[2]]
_, stemlines, _ = ax2.stem(fdp)
line = ax2.get_lines()
xd = line[0].get_xdata()
yd = line[0].get_ydata()
for i in range(len(stemlines)):
plt.plot([xd[i]], [yd[i]], 'o', ms=7, mfc=colors[i], mec=colors[i])
plt.setp(stemlines[i], 'color', colors[i])

How to plot data from multiple two column text files with legends in Matplotlib?

How do I open multiple text files from different directories and plot them on a single graph with legends?
This is relatively simple if you use pylab (included with matplotlib) instead of matplotlib directly. Start off with a list of filenames and legend names, like [ ('name of file 1', 'label 1'), ('name of file 2', 'label 2'), ...]. Then you can use something like the following:
import pylab
datalist = [ ( pylab.loadtxt(filename), label ) for filename, label in list_of_files ]
for data, label in datalist:
pylab.plot( data[:,0], data[:,1], label=label )
pylab.legend()
pylab.title("Title of Plot")
pylab.xlabel("X Axis Label")
pylab.ylabel("Y Axis Label")
You also might want to add something like fmt='o' to the plot command, in order to change from a line to points. By default, matplotlib with pylab plots onto the same figure without clearing it, so you can just run the plot command multiple times.
Assume your file looks like this and is named test.txt (space delimited):
1 2
3 4
5 6
7 8
Then:
#!/usr/bin/python
import numpy as np
import matplotlib.pyplot as plt
with open("test.txt") as f:
data = f.read()
data = data.split('\n')
x = [row.split(' ')[0] for row in data]
y = [row.split(' ')[1] for row in data]
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.set_title("Plot title...")
ax1.set_xlabel('your x label..')
ax1.set_ylabel('your y label...')
ax1.plot(x,y, c='r', label='the data')
leg = ax1.legend()
plt.show()
I find that browsing the gallery of plots on the matplotlib site helpful for figuring out legends and axes labels.
I feel the simplest way would be
from matplotlib import pyplot;
from pylab import genfromtxt;
mat0 = genfromtxt("data0.txt");
mat1 = genfromtxt("data1.txt");
pyplot.plot(mat0[:,0], mat0[:,1], label = "data0");
pyplot.plot(mat1[:,0], mat1[:,1], label = "data1");
pyplot.legend();
pyplot.show();
label is the string that is displayed on the legend
you can plot as many series of data points as possible before show() to plot all of them on the same graph
This is the simple way to plot simple graphs. For other options in genfromtxt go to this url.

Categories