Plotting a subplot- Python - python

I have been trying to achieve something like this:
Example
So far this is what I have tried:
crimes.Month = pd.to_datetime(crimes.Month, format='%Y/%m')
crimes.index = pd.DatetimeIndex(crimes.Month)
import numpy as np
crimes_count_date = crimes.pivot_table('Month', aggfunc=np.size,columns='Crime type', index=crimes.index.date, fill_value=0)
crimes_count_date.index = pd.DatetimeIndex(crimes_count_date.index)
plo = crimes_count_date.rolling(365).sum().plot(figsize=(12, 30),subplots=True, layout=(-1, 3), sharex=False, sharey=False)
Note- on the x-axis I would like to display each year/month: 2017/08
Current output below, is not showing all months/year or any lines for the crime types
Current Ouput

Not sure how your data looks.
But python has a nice way of doing subplots:
import matplotlib.pyplot as plt
plt.figure(figsize=(16,8)) ## setting over-all figure size (optional)
plt.subplot(2, 3, 1)
## this creates 6 subplots (2 rows and 3 columns)
## 1 at the end means we are in the first subplot.. then...
plt.plot(x1,y1) ## for well-selected x1 and y1
plt.subplot(232) ## the same as subplot(2, 3, 2) - you can use this when values have
## one digit only; now we are in the 2nd subplot
plt.plot(x2, y2) ## this will be plotted in the second subplot
## etc. ...
plt.subplot(236)
plt.plot(x6,y6)

Related

Pointplot and Scatterplot in one figure but X axis is shifting

Hi I'm trying to plot a pointplot and scatterplot on one graph with the same dataset so I can see the individual points that make up the pointplot.
Here is the code I am using:
xlPath = r'path to data here'
df = pd.concat(pd.read_excel(xlPath, sheet_name=None),ignore_index=True)
sns.pointplot(data=df, x='ID', y='HM (N/mm2)', palette='bright', capsize=0.15, alpha=0.5, ci=95, join=True, hue='Layer')
sns.scatterplot(data=df, x='ID', y='HM (N/mm2)')
plt.show()
When I plot, for some reason the points from the scatterplot are offsetting one ID spot right on the x-axis. When I plot the scatter or the point plot separately, they each are in the correct ID spot. Why would plotting them on the same plot cause the scatterplot to offset one right?
Edit: Tried to make the ID column categorical, but that didn't work either.
Seaborn's pointplot creates a categorical x-axis while here the scatterplot uses a numerical x-axis.
Explicitly making the x-values categorical: df['ID'] = pd.Categorical(df['ID']), isn't sufficient, as the scatterplot still sees numbers. Changing the values to strings does the trick. To get them in the correct order, sorting might be necessary.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# first create some test data
df = pd.DataFrame({'ID': np.random.choice(np.arange(1, 49), 500),
'HM (N/mm2)': np.random.uniform(1, 10, 500)})
df['Layer'] = ((df['ID'] - 1) // 6) % 4 + 1
df['HM (N/mm2)'] += df['Layer'] * 8
df['Layer'] = df['Layer'].map(lambda s: f'Layer {s}')
# sort the values and convert the 'ID's to strings
df = df.sort_values('ID')
df['ID'] = df['ID'].astype(str)
fig, ax = plt.subplots(figsize=(12, 4))
sns.pointplot(data=df, x='ID', y='HM (N/mm2)', palette='bright',
capsize=0.15, alpha=0.5, ci=95, join=True, hue='Layer', ax=ax)
sns.scatterplot(data=df, x='ID', y='HM (N/mm2)', color='purple', ax=ax)
ax.margins(x=0.02)
plt.tight_layout()
plt.show()

Adding quantitative values to differentiate data through colours in a scatterplot's legend in Python?

Currently, I'm working on an introductory paper on data manipulation and such; however... the CSV I'm working on has some things I wish to do a scatter graph on!
I want a scatter graph to show me the volume sold on certain items as well as their average price, differentiating all data according to their region (Through colours I assume).
So what I want is to know if I can add the region column as a quantitative value
or if there's a way to make this possible...
It's my first time using Python and I'm confused way too often
I'm not sure if this is what you mean, but here is some working code, assuming you have data in the format of [(country, volume, price), ...]. If not, you can change the inputs to the scatter method as needed.
import random
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
n_countries = 50
# get the data into "countries", for example
countries = ...
# in this example: countries is [('BS', 21, 25), ('WZ', 98, 25), ...]
df = pd.DataFrame(countries)
# arbitrary method to get a color
def get_color(i, max_i):
cmap = matplotlib.cm.get_cmap('Spectral')
return cmap(i/max_i)
# get the figure and axis - make a larger figure to fit more points
# add labels for metric names
def get_fig_ax():
fig = plt.figure(figsize=(14,14))
ax = fig.add_subplot(1, 1, 1)
ax.set_xlabel('volume')
ax.set_ylabel('price')
return fig, ax
# switch around the assignments depending on your data
def get_x_y_labels():
x = df[1]
y = df[2]
labels = df[0]
return x, y, labels
offset = 1 # offset just so annotations aren't on top of points
x, y, labels = get_x_y_labels()
fig, ax = get_fig_ax()
# add a point and annotation for each of the labels/regions
for i, region in enumerate(labels):
ax.annotate(region, (x[i] + offset, y[i] + offset))
# note that you must use "label" for "legend" to work
ax.scatter(x[i], y[i], color=get_color(i, len(x)), label=region)
# Add the legend just outside of the plot.
# The .1, 0 at the end will put it outside
ax.legend(loc='upper right', bbox_to_anchor=(1, 1, .1, 0))
plt.show()

Plot subplots using seaborn pairplot

If I draw the plot using the following code, it works and I can see all the subplots in a single row. I can specifically break the number of cols into three or two and show them. But I have 30 columns and I wanted to use a loop mechanism so that they are plotted in a grid of say 4x4 sub-plots
regressionCols = ['col_a', 'col_b', 'col_c', 'col_d', 'col_e']
sns.pairplot(numerical_df, x_vars=regressionCols, y_vars='price',height=4, aspect=1, kind='scatter')
plt.show()
The code using loop is below. However, I don't see anything rendered.
nr_rows = 4
nr_cols = 4
li_cat_cols = list(regressionCols)
fig, axs = plt.subplots(nr_rows, nr_cols, figsize=(nr_cols*4,nr_rows*4), squeeze=False)
for r in range(0, nr_rows):
for c in range(0,nr_cols):
i = r*nr_cols+c
if i < len(li_cat_cols):
sns.set(style="darkgrid")
bp=sns.pairplot(numerical_df, x_vars=li_cat_cols[i], y_vars='price',height=4, aspect=1, kind='scatter')
bp.set(xlabel=li_cat_cols[i], ylabel='Price')
plt.tight_layout()
plt.show()
Not sure what I am missing.
I think you didnt connect each of your subplot spaces in a matrix plot to scatter plots generated in a loop.
Maybe this solution with inner pandas plots could be proper for you:
For example,
1.Lets simply define an empty pandas dataframe.
numerical_df = pd.DataFrame([])
2. Create some random features and price depending on them:
numerical_df['A'] = np.random.randn(100)
numerical_df['B'] = np.random.randn(100)*10
numerical_df['C'] = np.random.randn(100)*-10
numerical_df['D'] = np.random.randn(100)*2
numerical_df['E'] = 20*(np.random.randn(100)**2)
numerical_df['F'] = np.random.randn(100)
numerical_df['price'] = 2*numerical_df['A'] +0.5*numerical_df['B'] - 9*numerical_df['C'] + numerical_df['E'] + numerical_df['D']
3. Define number of rows and columns. Create a subplots space with nr_rows and nr_cols.
nr_rows = 2
nr_cols = 4
fig, axes = plt.subplots(nrows=nr_rows, ncols=nr_cols, figsize=(15, 8))
for idx, feature in enumerate(numerical_df.columns[:-1]):
numerical_df.plot(feature, "price", subplots=True,kind="scatter",ax=axes[idx // 4,idx % 4])
4. Enumerate each feature in dataframe and plot a scatterplot with price:
for idx, feature in enumerate(numerical_df.columns[:-1]):
numerical_df.plot(feature, "price", subplots=True,kind="scatter",ax=axes[idx // 4,idx % 4])
where axes[idx // 4, idx % 4] defines the location of each scatterplot in a matrix you create in (3.)
So, we got a matrix plot:
Scatterplot matrix

How to create bar chart with secondary_y from dataframe

I want to create a bar chart of two series (say 'A' and 'B') contained in a Pandas dataframe. If I wanted to just plot them using a different y-axis, I can use secondary_y:
df = pd.DataFrame(np.random.uniform(size=10).reshape(5,2),columns=['A','B'])
df['A'] = df['A'] * 100
df.plot(secondary_y=['A'])
but if I want to create bar graphs, the equivalent command is ignored (it doesn't put different scales on the y-axis), so the bars from 'A' are so big that the bars from 'B' are cannot be distinguished:
df.plot(kind='bar',secondary_y=['A'])
How can I do this in pandas directly? or how would you create such graph?
I'm using pandas 0.10.1 and matplotlib version 1.2.1.
Don't think pandas graphing supports this. Did some manual matplotlib code.. you can tweak it further
import pylab as pl
fig = pl.figure()
ax1 = pl.subplot(111,ylabel='A')
#ax2 = gcf().add_axes(ax1.get_position(), sharex=ax1, frameon=False, ylabel='axes2')
ax2 =ax1.twinx()
ax2.set_ylabel('B')
ax1.bar(df.index,df.A.values, width =0.4, color ='g', align = 'center')
ax2.bar(df.index,df.B.values, width = 0.4, color='r', align = 'edge')
ax1.legend(['A'], loc = 'upper left')
ax2.legend(['B'], loc = 'upper right')
fig.show()
I am sure there are ways to force the one bar further tweak it. move bars further apart, one slightly transparent etc.
Ok, I had the same problem recently and even if it's an old question, I think that I can give an answer for this problem, in case if someone else lost his mind with this. Joop gave the bases of the thing to do, and it's easy when you only have (for exemple) two columns in your dataframe, but it becomes really nasty when you have a different numbers of columns for the two axis, due to the fact that you need to play with the position argument of the pandas plot() function. In my exemple I use seaborn but it's optionnal :
import pandas as pd
import seaborn as sns
import pylab as plt
import numpy as np
df1 = pd.DataFrame(np.array([[i*99 for i in range(11)]]).transpose(), columns = ["100"], index = [i for i in range(11)])
df2 = pd.DataFrame(np.array([[i for i in range(11)], [i*2 for i in range(11)]]).transpose(), columns = ["1", "2"], index = [i for i in range(11)])
fig, ax = plt.subplots()
ax2 = ax.twinx()
# we must define the length of each column.
df1_len = len(df1.columns.values)
df2_len = len(df2.columns.values)
column_width = 0.8 / (df1_len + df2_len)
# we calculate the position of each column in the plot. This value is based on the position definition :
# Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center)
# http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.plot.html
df1_posi = 0.5 + (df2_len/float(df1_len)) * 0.5
df2_posi = 0.5 - (df1_len/float(df2_len)) * 0.5
# In order to have nice color, I use the default color palette of seaborn
df1.plot(kind='bar', ax=ax, width=column_width*df1_len, color=sns.color_palette()[:df1_len], position=df1_posi)
df2.plot(kind='bar', ax=ax2, width=column_width*df2_len, color=sns.color_palette()[df1_len:df1_len+df2_len], position=df2_posi)
ax.legend(loc="upper left")
# Pandas add line at x = 0 for each dataframe.
ax.lines[0].set_visible(False)
ax2.lines[0].set_visible(False)
# Specific to seaborn, we have to remove the background line
ax2.grid(b=False, axis='both')
# We need to add some space, the xlim don't manage the new positions
column_length = (ax2.get_xlim()[1] - abs(ax2.get_xlim()[0])) / float(len(df1.index))
ax2.set_xlim([ax2.get_xlim()[0] - column_length, ax2.get_xlim()[1] + column_length])
fig.patch.set_facecolor('white')
plt.show()
And the result : http://i.stack.imgur.com/LZjK8.png
I didn't test every possibilities but it looks like it works fine whatever the number of columns in each dataframe you use.

Multiple figures in a single window

I want to create a function which plot on screen a set of figures in a single window. By now I write this code:
import pylab as pl
def plot_figures(figures):
"""Plot a dictionary of figures.
Parameters
----------
figures : <title, figure> dictionary
"""
for title in figures:
pl.figure()
pl.imshow(figures[title])
pl.gray()
pl.title(title)
pl.axis('off')
It works perfectly but I would like to have the option for plotting all the figures in single window. And this code doesn't. I read something about subplot but it looks quite tricky.
You can define a function based on the subplots command (note the s at the end, different from the subplot command pointed by urinieto) of matplotlib.pyplot.
Below is an example of such a function, based on yours, allowing to plot multiples axes in a figure. You can define the number of rows and columns you want in the figure layout.
def plot_figures(figures, nrows = 1, ncols=1):
"""Plot a dictionary of figures.
Parameters
----------
figures : <title, figure> dictionary
ncols : number of columns of subplots wanted in the display
nrows : number of rows of subplots wanted in the figure
"""
fig, axeslist = plt.subplots(ncols=ncols, nrows=nrows)
for ind,title in enumerate(figures):
axeslist.ravel()[ind].imshow(figures[title], cmap=plt.gray())
axeslist.ravel()[ind].set_title(title)
axeslist.ravel()[ind].set_axis_off()
plt.tight_layout() # optional
Basically, the function creates a number of axes in the figures, according to the number of rows (nrows) and columns (ncols) you want, and then iterates over the list of axis to plot your images and adds the title for each of them.
Note that if you only have one image in your dictionary, your previous syntax plot_figures(figures) will work since nrows and ncols are set to 1 by default.
An example of what you can obtain:
import matplotlib.pyplot as plt
import numpy as np
# generation of a dictionary of (title, images)
number_of_im = 6
figures = {'im'+str(i): np.random.randn(100, 100) for i in range(number_of_im)}
# plot of the images in a figure, with 2 rows and 3 columns
plot_figures(figures, 2, 3)
You should use subplot.
In your case, it would be something like this (if you want them one on top of the other):
fig = pl.figure(1)
k = 1
for title in figures:
ax = fig.add_subplot(len(figures),1,k)
ax.imshow(figures[title])
ax.gray()
ax.title(title)
ax.axis('off')
k += 1
Check out the documentation for other options.
If you want to group multiple figures in one window you can do smth. like this:
import matplotlib.pyplot as plt
import numpy as np
img = plt.imread('C:/.../Download.jpg') # Path to image
img = img[0:150,50:200,0] # Define image size to be square --> Or what ever shape you want
fig = plt.figure()
nrows = 10 # Define number of columns
ncols = 10 # Define number of rows
image_heigt = 150 # Height of the image
image_width = 150 # Width of the image
pixels = np.zeros((nrows*image_heigt,ncols*image_width)) # Create
for a in range(nrows):
for b in range(ncols):
pixels[a*image_heigt:a*image_heigt+image_heigt,b*image_heigt:b*image_heigt+image_heigt] = img
plt.imshow(pixels,cmap='jet')
plt.axis('off')
plt.show()
As result you receive:
Building on the answer from: How to display multiple images in one figure correctly?, here is another method:
import math
import numpy as np
import matplotlib.pyplot as plt
def plot_images(np_images, titles = [], columns = 5, figure_size = (24, 18)):
count = np_images.shape[0]
rows = math.ceil(count / columns)
fig = plt.figure(figsize=figure_size)
subplots = []
for index in range(count):
subplots.append(fig.add_subplot(rows, columns, index + 1))
if len(titles):
subplots[-1].set_title(str(titles[index]))
plt.imshow(np_images[index])
plt.show()
You can also do this:
import matplotlib.pyplot as plt
f, axarr = plt.subplots(1, len(imgs))
for i, img in enumerate(imgs):
axarr[i].imshow(img)
plt.suptitle("Your title!")
plt.show()
def plot_figures(figures, nrows=None, ncols=None):
if not nrows or not ncols:
# Plot figures in a single row if grid not specified
nrows = 1
ncols = len(figures)
else:
# check minimum grid configured
if len(figures) > nrows * ncols:
raise ValueError(f"Too few subplots ({nrows*ncols}) specified for ({len(figures)}) figures.")
fig = plt.figure()
# optional spacing between figures
fig.subplots_adjust(hspace=0.4, wspace=0.4)
for index, title in enumerate(figures):
plt.subplot(nrows, ncols, index + 1)
plt.title(title)
plt.imshow(figures[title])
plt.show()
Any grid configuration (or none) can be specified as long as the product of the number of rows and the number of columns is equal to or greater than the number of figures.
For example, for len(figures) == 10, these are acceptable
plot_figures(figures)
plot_figures(figures, 2, 5)
plot_figures(figures, 3, 4)
plot_figures(figures, 4, 3)
plot_figures(figures, 5, 2)
import numpy as np
def save_image(data, ws=0.1, hs=0.1, sn='save_name'):
import matplotlib.pyplot as plt
m = n = int(np.sqrt(data.shape[0])) # (36, 1, 32, 32)
fig, ax = plt.subplots(m,n, figsize=(m*6,n*6))
ax = ax.ravel()
for i in range(data.shape[0]):
ax[i].matshow(data[i,0,:,:])
ax[i].set_xticks([])
ax[i].set_yticks([])
plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9,
top=0.9, wspace=ws, hspace=hs)
plt.tight_layout()
plt.savefig('{}.png'.format(sn))
data = np.load('img_test.npy')
save_image(data, ws=0.1, hs=0.1, sn='multiple_plot')

Categories