Python Matplotlib: Splitting one Large Graph into several Sub-Graphs (Subplot) - python

Simply put suppose I have 2 lists:
A -> Has the list of names ['A','B','C','D','E','F','G','H']
B -> Has the list of values [5,7,3,8,2,9,1,3]
A will be the names of the X-Axis labels and the corresponding values in B will be the height of the graph ( i.e. the Y-Axis ).
%matplotlib inline
import pandas as pd
from matplotlib import rcParams
import matplotlib.pyplot as plt
from operator import itemgetter
rcParams.update({'figure.autolayout': True})
plt.figure(figsize=(14,9), dpi=600)
reso_names = ['A','B','C','D','E','F','G','H']
reso_values = [5,7,3,8,2,9,1,3]
plt.bar(range(len(reso_values)), reso_values, align='center')
plt.xticks(range(len(reso_names)), list(reso_names), rotation='vertical')
plt.margins(0.075)
plt.xlabel('X-Axis')
plt.ylabel('Y-Axis')
plt.title('Graph', {'family' : 'Arial Black',
'weight' : 'bold',
'size' : 22})
plt.show()
This code gives the following output :
However I want it such that it makes subgraphs for every 2 values. In this case there should be 4 subgraphs:
1st Graph has 'A' and 'B'
2nd Graph has 'C' and 'D'
3rd Graph has 'E' and 'F'
4th Graph has 'G' and 'H'
This splitting should be done dynamically (not 4 different loops, it should break the graph into units of 2 each depending on the size of the input, if list A has 10 values then it should give 5 subgraphs).
I figured out how to split the graph into two with half each but I need to achieve it using steps of N per graph (N in this example being 2).
The code I have for breaking the graph into 2 equal subgraphs is :
%matplotlib inline
import pandas as pd
from matplotlib import rcParams
import matplotlib.pyplot as plt
from operator import itemgetter
rcParams.update({'figure.autolayout': True})
plt.figure(figsize=(14,9), dpi=600)
reso_names = ['A','B','C','D','E','F','G','H']
reso_values = [5,7,3,8,2,9,1,3]
fig, axs = plt.subplots(nrows=2, sharey=True, figsize=(14,18), dpi=50)
size = int(len(reso_values))
half = int( size/2 )
fig.suptitle('Graph',
**{'family': 'Arial Black', 'size': 22, 'weight': 'bold'})
for ax, start, end in zip(axs, (0, half), (half, size)):
names, values = list(reso_names[start:end]), reso_values[start:end]
ax.bar(range(len(values)), values, align='center')
ax.set_xlabel('X-Axis')
ax.set_ylabel('Y-Axis')
ax.set_xticks(range(len(names)))
ax.set_xticklabels(names, rotation='vertical')
ax.set_xlim(0, len(names))
fig.subplots_adjust(bottom=0.05, top=0.95)
plt.show()
Which gives me :
I just want the program to dynamically split the graphs into subgraphs based on the splitting number N.

You can directly split your lists values/names with size elements into size//N + 1 list of N elements with this code :
N=3
sublists_names = [reso_names[x:x+N] for x in range(0, len(reso_names), N)]
sublists_values = [reso_values[x:x+N] for x in range(0, len(reso_values), N)]
Note that the last sublist will have less elements if N does not divide size.
Then you just perform a zip and plot each sublist in a different graph :
import pandas as pd
from matplotlib import rcParams
import matplotlib.pyplot as plt
from operator import itemgetter
rcParams.update({'figure.autolayout': True})
plt.figure(figsize=(14,9), dpi=600)
reso_names = ['A','B','C','D','E','F','G','H']
reso_values = [5,7,3,8,2,9,1,3]
N=3
sublists_names = [reso_names[x:x+N] for x in range(0, len(reso_names), N)]
sublists_values = [reso_values[x:x+N] for x in range(0, len(reso_values), N)]
size = int(len(reso_values))
fig, axs = plt.subplots(nrows=size//N+1, sharey=True, figsize=(14,18), dpi=50)
fig.suptitle('Graph',
**{'family': 'Arial Black', 'size': 22, 'weight': 'bold'})
for ax, names, values in zip(axs, sublists_names, sublists_values):
ax.bar(range(len(values)), values, align='center')
ax.set_xlabel('X-Axis')
ax.set_ylabel('Y-Axis')
ax.set_xticks(range(len(names)))
ax.set_xticklabels(names, rotation='vertical')
ax.set_xlim(0, len(names))
#ax.set_xlim(0, N)
fig.subplots_adjust(bottom=0.05, top=0.95)
plt.show()
If the list are not dividible by N, you can uncomment the last commented line so the bars stay alined on the last subplot : (ax.set_xlim(0, N)) :

Related

How to place arrows between multiple scatter points

The code below produce this graph. I wonder if there is a way to make the lines between value1 and value2 into arrows, pointing in the direction of 1 to 2, from blue to green (In this case none of blues is lower than the greens).
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Create a dataframe
value1=np.random.uniform(size=20)
value2=value1+np.random.uniform(size=20)/4
df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'value1':value1 , 'value2':value2 })
# Reorder it following the values of the first value:
ordered_df = df.sort_values(by='value1')
my_range=range(1,len(df.index)+1)
# The horizontal plot is made using the hline function
plt.hlines(y=my_range, xmin=ordered_df['value1'], xmax=ordered_df['value2'], color='grey', alpha=0.4)
plt.scatter(ordered_df['value1'], my_range, color='skyblue', alpha=1, label='value1')
plt.scatter(ordered_df['value2'], my_range, color='green', alpha=0.4 , label='value2')
plt.legend()
# Add title and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("Comparison of the value 1 and the value 2", loc='left')
plt.xlabel('Value of the variables')
plt.ylabel('Group')
# Show the graph
plt.show()
The best option for multiple arrows is matplotlib.pyplot.quiver, because it accepts an array or dataframe of locations, unlike matplotlib.pyplot.arrow, which only accepts a single value.
Since the y-axis labels are defined by 'group', which are letters, use V = np.zeros(len(ordered_df)) or V = ordered_df.index - ordered_df.index for the .quiver direction vector.
Plot the dataframe directly with pandas.DataFrame.plot and kind='scatter'.
Tested in python 3.8.12, pandas 1.3.3, matplotlib 3.4.3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Create a dataframe
np.random.seed(354)
value1=np.random.uniform(size=20)
value2=value1+np.random.uniform(size=20)/4
df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'value1':value1 , 'value2':value2 })
# Reorder it following the values of the first value and reset the index so the index values correspond to the y-axis tick locations
ordered_df = df.sort_values(by='value1').reset_index(drop=True)
# plot the dataframe
ax = ordered_df.plot(kind='scatter', x='value1', y='group', color='skyblue', alpha=1, figsize=(8, 6), label='value1')
ordered_df.plot(kind='scatter', x='value2', y='group', color='green', alpha=1, ax=ax, label='value2', xlabel='Value of the variables', ylabel='Group')
# plot the arrows
V = ordered_df.index - ordered_df.index # the Y direction vector is 0 for each
ax.quiver(ordered_df.value1, ordered_df.group, (ordered_df.value2-ordered_df.value1), V, width=0.003, color='gray', scale_units='x', scale=1)
# Add title with position
ax.set_title("Comparison of the value 1 and the value 2", loc='left')
# Show the graph
plt.show()
You can use plt.arrow instead of plt.hlines, but you have to loop over the rows:
for y, (_, row) in enumerate(ordered_df.iterrows()):
arrow_head_length = 0.02
plt.arrow(x=row['value1'], y=y+1, dx=row['value2']-row['value1']-arrow_head_length, dy=0,
head_width=0.5, head_length=arrow_head_length, fc='k', ec='k',
color='grey', alpha=0.4)
example:

How do I change the order of pie chart slices in Python

I want to change the order of slices in the pie-chart. Currently, the slices are arranged in descending order. I want the slices of the chart to be in the following order:
Yes > Sometimes > Most of the times > no
I am using the following code:
colors = [ '#99f3bd', '#fbaccc','#a8df65', '#ff7b54']
fig, ax = plt.subplots(figsize=(5,15))
ax.set_title('Treatment Group', fontsize=25, fontname="Times New Roman Bold")
ax = df['q6_t'].value_counts(normalize=True).plot.pie(autopct='%1.0f%%', colors = colors)
ax.set_ylabel("")
plt.savefig('q6_t.png', bbox_inches = 'tight', transparent=True)
I am surprised that I have not found a duplicate for this presumably common question. If you want a specific order in the pie plot, you have to sort the pandas series generated by your value counts:
import matplotlib.pyplot as plt
import pandas as pd
#corresponding color-label pairs
colors = ['#99f3bd', '#fbaccc', '#a8df65', '#ff7b54']
labels = ["Yes", "Sometimes", "Most of the times", "No"]
#test data generation
import numpy as np
n=10
np.random.seed(1234)
df=pd.DataFrame({"A": np.random.random(n), "q6_t": np.random.choice(labels, n)})
#print(df)
fig, ax = plt.subplots(figsize=(8, 8))
ax.set_title('Treatment Group', fontsize=25, fontname="Times New Roman Bold")
#reindex(labels) sorts the index of the value counts according to the list labels
ax = df['q6_t'].value_counts(normalize=True).reindex(labels).plot.pie(autopct='%1.0f%%', colors = colors)
ax.set_ylabel("")
plt.show()
Sample output:

Python Bubble Chart Legands- TypeError

Here's my code:
import pandas
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
%matplotlib inline
pandas.set_option ('max_columns',10)
df= pandas.read_csv('C:/Users/HP/Desktop/Coding/Python/2.Python Data Analysis/Module 2- Python Data Visualization/M2-Bubble Chart with Labels and Legend data.csv')
plt.scatter(x=df['GDP per Capita'],y=df['Life Span'],s=df['Population']/1000, alpha=0.5, c=df['Bubble color'])
#alpha at 0.5 to set the transparency
#chart title, axis labels
plt.title('GDP,Lifespan,Population (Bubble size)')
plt.xlabel(' GDP')
plt.ylabel('Lifespan')
#bubble labels
x,y= df['GDP per Capita'],df['Life Span']
for i, txt in enumerate (df['Territory']):
plt.annotate(txt,(x[i],y[i]))
print(i,txt,x[i],y[i],df['Population'][i],df['Bubble color'][i])
#annotate: is used to assign the population with the chart sp have from text, the x and y will be the next one
#it will print out the index number with the one that assigned with it as well
#legend
territory_list=list(df['Territory'])
bubble_color_list = list(df['Bubble color'])
l = []
for i in range (0,len(df.index)):
l.append(mpatches.Patch(color=bubble_color_list[i],
alpha=0.5,
label=territory_list[i]))
plt.legend(handles=1,loc=(2,0))
#the i is in the For loop is just basically like the one above to have all the information
I am looking to generate a Bubble chart with a legend for it but somehow it does not show the legend like it suppose to, just only the chart, and then show this message.
`TypeError: 'int' object is not iterable`
What am I doing wrong?
You cannot have handles=1 in plt.legend(handles=1,loc=(2,0)).
handles must be a container, such as a list, tuple, etc...
Not only that, but a container of integers in unacceptable. Do not write handles=[1, 2, 3]
The following code shows how to properly call the legend method:
import numpy as np
import matplotlib.pyplot as plt
# Make some fake data.
a = b = np.arange(0, 3, .02)
c = np.exp(a)
d = c[::-1]
fig, ax = plt.subplots()
line1 = ax.plot(a, c, 'k--')
line2 = ax.plot(a, d, 'k:')
line3 = ax.plot(a, c + d, 'k')
ax.legend((line1, line2, line3), ('line 1', 'line 2', 'line 3'), loc=(2,0))
plt.show()

Pandas: Splitting a Graph into many sub graphs while maintaining scale

I have a bar graph of 150 values.The code is :
rcParams.update({'figure.autolayout': True})
plt.figure(figsize=(14,9), dpi=600)
reso_names = [x[0] for x in resolution3]
reso_values = [x[1] for x in resolution3]
plt.bar(range(len(reso_values[0:20])), reso_values[0:20], align='center')
plt.xticks(range(len(reso_names[0:20])), list(reso_names[0:20]), rotation='vertical')
plt.margins(0.075)
plt.xlabel('Resolution Category Tier 3')
plt.ylabel('Volume')
plt.title('Resolution Category Tier 3 Volume', {'family' : 'Arial Black',
'weight' : 'bold',
'size' : 22})
plt.savefig('Reso3.pdf', format='pdf')
plt.show()
Since I want to break it down into sub-graphs of 20 each to maintain readability I'm using the [0:20] at the reso_names and reso_values (both lists.
However the problem is that scale cannot be maintained, each sub-graphs have very different scales and that is a problem in terms of consistency not being maintained. How can I set a scale that can be maintained across all the graphs.
You can specify sharey=True to keep the y-scale same in all subplots.
import numpy as np
import matplotlib.pyplot as plt
x = np.random.randint(1, 10, 10)
y = np.random.randint(1, 100, 10)
fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)
# do simple plot here, replace barplot yourself
axes[0].plot(x)
axes[1].plot(y)
Or if you prefer to plot them separately, you can manually configure ax.set_ylim().

Multiple figures in a single window

I want to create a function which plot on screen a set of figures in a single window. By now I write this code:
import pylab as pl
def plot_figures(figures):
"""Plot a dictionary of figures.
Parameters
----------
figures : <title, figure> dictionary
"""
for title in figures:
pl.figure()
pl.imshow(figures[title])
pl.gray()
pl.title(title)
pl.axis('off')
It works perfectly but I would like to have the option for plotting all the figures in single window. And this code doesn't. I read something about subplot but it looks quite tricky.
You can define a function based on the subplots command (note the s at the end, different from the subplot command pointed by urinieto) of matplotlib.pyplot.
Below is an example of such a function, based on yours, allowing to plot multiples axes in a figure. You can define the number of rows and columns you want in the figure layout.
def plot_figures(figures, nrows = 1, ncols=1):
"""Plot a dictionary of figures.
Parameters
----------
figures : <title, figure> dictionary
ncols : number of columns of subplots wanted in the display
nrows : number of rows of subplots wanted in the figure
"""
fig, axeslist = plt.subplots(ncols=ncols, nrows=nrows)
for ind,title in enumerate(figures):
axeslist.ravel()[ind].imshow(figures[title], cmap=plt.gray())
axeslist.ravel()[ind].set_title(title)
axeslist.ravel()[ind].set_axis_off()
plt.tight_layout() # optional
Basically, the function creates a number of axes in the figures, according to the number of rows (nrows) and columns (ncols) you want, and then iterates over the list of axis to plot your images and adds the title for each of them.
Note that if you only have one image in your dictionary, your previous syntax plot_figures(figures) will work since nrows and ncols are set to 1 by default.
An example of what you can obtain:
import matplotlib.pyplot as plt
import numpy as np
# generation of a dictionary of (title, images)
number_of_im = 6
figures = {'im'+str(i): np.random.randn(100, 100) for i in range(number_of_im)}
# plot of the images in a figure, with 2 rows and 3 columns
plot_figures(figures, 2, 3)
You should use subplot.
In your case, it would be something like this (if you want them one on top of the other):
fig = pl.figure(1)
k = 1
for title in figures:
ax = fig.add_subplot(len(figures),1,k)
ax.imshow(figures[title])
ax.gray()
ax.title(title)
ax.axis('off')
k += 1
Check out the documentation for other options.
If you want to group multiple figures in one window you can do smth. like this:
import matplotlib.pyplot as plt
import numpy as np
img = plt.imread('C:/.../Download.jpg') # Path to image
img = img[0:150,50:200,0] # Define image size to be square --> Or what ever shape you want
fig = plt.figure()
nrows = 10 # Define number of columns
ncols = 10 # Define number of rows
image_heigt = 150 # Height of the image
image_width = 150 # Width of the image
pixels = np.zeros((nrows*image_heigt,ncols*image_width)) # Create
for a in range(nrows):
for b in range(ncols):
pixels[a*image_heigt:a*image_heigt+image_heigt,b*image_heigt:b*image_heigt+image_heigt] = img
plt.imshow(pixels,cmap='jet')
plt.axis('off')
plt.show()
As result you receive:
Building on the answer from: How to display multiple images in one figure correctly?, here is another method:
import math
import numpy as np
import matplotlib.pyplot as plt
def plot_images(np_images, titles = [], columns = 5, figure_size = (24, 18)):
count = np_images.shape[0]
rows = math.ceil(count / columns)
fig = plt.figure(figsize=figure_size)
subplots = []
for index in range(count):
subplots.append(fig.add_subplot(rows, columns, index + 1))
if len(titles):
subplots[-1].set_title(str(titles[index]))
plt.imshow(np_images[index])
plt.show()
You can also do this:
import matplotlib.pyplot as plt
f, axarr = plt.subplots(1, len(imgs))
for i, img in enumerate(imgs):
axarr[i].imshow(img)
plt.suptitle("Your title!")
plt.show()
def plot_figures(figures, nrows=None, ncols=None):
if not nrows or not ncols:
# Plot figures in a single row if grid not specified
nrows = 1
ncols = len(figures)
else:
# check minimum grid configured
if len(figures) > nrows * ncols:
raise ValueError(f"Too few subplots ({nrows*ncols}) specified for ({len(figures)}) figures.")
fig = plt.figure()
# optional spacing between figures
fig.subplots_adjust(hspace=0.4, wspace=0.4)
for index, title in enumerate(figures):
plt.subplot(nrows, ncols, index + 1)
plt.title(title)
plt.imshow(figures[title])
plt.show()
Any grid configuration (or none) can be specified as long as the product of the number of rows and the number of columns is equal to or greater than the number of figures.
For example, for len(figures) == 10, these are acceptable
plot_figures(figures)
plot_figures(figures, 2, 5)
plot_figures(figures, 3, 4)
plot_figures(figures, 4, 3)
plot_figures(figures, 5, 2)
import numpy as np
def save_image(data, ws=0.1, hs=0.1, sn='save_name'):
import matplotlib.pyplot as plt
m = n = int(np.sqrt(data.shape[0])) # (36, 1, 32, 32)
fig, ax = plt.subplots(m,n, figsize=(m*6,n*6))
ax = ax.ravel()
for i in range(data.shape[0]):
ax[i].matshow(data[i,0,:,:])
ax[i].set_xticks([])
ax[i].set_yticks([])
plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9,
top=0.9, wspace=ws, hspace=hs)
plt.tight_layout()
plt.savefig('{}.png'.format(sn))
data = np.load('img_test.npy')
save_image(data, ws=0.1, hs=0.1, sn='multiple_plot')

Categories