I want to change the size of the of the intervals between numbers.
The x axis obviously goes from 10 to 26. But I want every whole number to be displayed: 10, 11, 12, 13 etc...
I also want bins to have a width of .5 so that I can have a bin from 10.5 to 11 or 24 to 24.5 etc...because otherwise, python outputs the histogram with the bins random and undetermined.
Here's what I have:
import random
import numpy
from matplotlib import pyplot
import numpy as np
data = np.genfromtxt('result.csv',delimiter=',',skip_header=1, dtype=float)
magg=[row[5] for row in data]
magr=[row[6] for row in data]
bins = numpy.linspace(10, 26)
pyplot.hist(magg, bins, alpha=0.5, color='g', label='mag of g')
pyplot.hist(magr, bins, alpha=0.5, color='r', label='mag of r')
pyplot.legend(loc='upper left')
pyplot.show()
Use an axes locator, in particular, MultipleLocator. Building of your example, it becomes this:
import matplotlib.pyplot as plt
import numpy as np
x = np.random.random_integers(low=10, high=27, size=37)
bins = np.linspace(10, 26)
fig, ax = plt.subplots()
hist = ax.hist(x, bins, alpha=0.5, color='g', label='mag of g')
ax.xaxis.set_major_locator(plt.MultipleLocator(1))
Related
I'm trying to get an "inverse" second x-axis to a x-axis in log-scale. secondary_xaxis works fine but I can't format the ticks as usually. Is this a bug or am I missing something?
Here's what I do:
import matplotlib.pyplot as plt
import numpy as np
fig, axs = plt.subplots(figsize=(0.6*4,0.6*3))
y = np.random.rand(10)
x = np.linspace(30,200,num=10)
p = plt.plot(x, y, marker='o', markersize=3)
plt.xscale("log")
def m2rho(m):
return 1 / m * 1000
def rho2m(rho):
return 1 / rho / 1000
secax = axs.secondary_xaxis('top', functions=(m2rho, rho2m))
from matplotlib.ticker import ScalarFormatter, NullFormatter
for axis in [axs.xaxis, secax.xaxis]:
axis.set_major_formatter(ScalarFormatter())
axis.set_minor_formatter(NullFormatter())
axs.set_xticks([50, 100, 200])
secax.set_xticks([20,10,5])
plt.tight_layout()
plt.show()
Resulting in:
So essentially the second axis on top should display the ticks 20, 10 and 5 in non-scientific numbers, but it doesn't.
Essentially, I'm trying to make a 4-D scatter plot with 4 columns of data (see sample below).
X (mm) Y (mm) Z (mm) Diameter (mm)
11.096 11.0972 13.2401 124.279
14.6836 11.0389 8.37134 138.949
19.9543 11.1025 31.1912 138.949
15.4079 10.9505 31.1639 152.21
20.6372 14.5175 6.94501 152.211
20.47 11.225 31.3612 152.211
19.0432 11.3234 8.93819 152.213
29.4091 10.1331 26.6354 186.417
12.9391 10.6616 28.9523 186.418
29.9102 10.4828 25.1129 186.418
30.5483 12.163 15.9116 186.418
19.0631 10.5784 30.9791 186.418
9.65332 10.8563 12.975 186.419
8.4003 11.0417 17.0181 186.419
26.0134 10.6857 9.41572 186.419
13.7451 11.1495 28.7108 186.419
The first three columns of data (X, Y, Z) are the coordinate positions of the 4th column of data (Diameter) so I was able to generate a 3-D scatter plot of these positions. However, I'm trying to plot these Diameters with different color markers based on certain threshold values (ie. Diameters that are less than 100 mm are red, 101-200 mm are blue, 201-300 mm are green, etc.) Once the color of the markers are determined, it would plot these markers based on its X, Y, Z coordinates. I tried writing a simple for loop to do this, but for some reason it becomes very slow/laggy and will only plot one color too. Can anyone see if there's something wrong with my approach? Thanks!
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import pandas
import os
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
os.chdir(r'C:\Users\Me\Documents')
data = pandas.read_excel("Diameter Report", "Data")
X = data['X (mm)'].values.tolist()
Y = data['Y (mm)'].values.tolist()
Z = data['Z (mm)'].values.tolist()
dims = data['Diameter (mm)'].values.tolist()
for i in dims:
if i < int(100):
ax.plot(X, Y, Z, c='r', marker='o')
elif i >= int(101) and i <200:
ax.plot(X, Y, Z, c='b', marker='o')
elif i >= int(201) and i <300:
ax.plot(X, Y, Z, c='g', marker='o')
ax.set_xlabel('Center X (mm)')
ax.set_ylabel('Center Y (mm)')
ax.set_zlabel('Center Z (mm)')
plt.show()
It seems the thresholds for the values are equally spaced, so you can just divide by 100 and truncate further decimal places. This allows to plot a single scatter instead of hundreds of plots.
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import pandas
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
data = pandas.read_excel("Diameter Report", "Data")
X = data['X (mm)'].values
Y = data['Y (mm)'].values
Z = data['Z (mm)'].values
dims = data['Diameter (mm)'].values
ax.scatter(X,Y,Z, c=(dims/100).astype(int), marker="o", cmap="brg")
ax.set_xlabel('Center X (mm)')
ax.set_ylabel('Center Y (mm)')
ax.set_zlabel('Center Z (mm)')
plt.show()
The more general case of arbitrary boundaries would probably best be solved using a BoundaryNorm and a colormap with as many different colors as classifications.
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import pandas as pd
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
d = np.random.rand(10,4)
d[:,3] = np.random.randint(1,300, 10)
data = pd.DataFrame(d, columns=["X (mm)","Y (mm)","Z (mm)","Diameter (mm)"])
X = data['X (mm)'].values
Y = data['Y (mm)'].values
Z = data['Z (mm)'].values
dims = data['Diameter (mm)'].values
bounds = [0,100,200,300]
colors = ["b", "r", "g"]
cmap = mcolors.ListedColormap(colors)
norm = mcolors.BoundaryNorm(bounds, len(colors))
sc = ax.scatter(X,Y,Z, c=dims, marker="o", cmap=cmap, norm=norm)
ax.set_xlabel('Center X (mm)')
ax.set_ylabel('Center Y (mm)')
ax.set_zlabel('Center Z (mm)')
fig.colorbar(sc)
plt.show()
Here is a slightly more general solution where you can explicitly specify the ranges you want regardless of the spacing. I did not have the complete data so I modified your limits from 100, 200, 300 to 140, 180, 200 based on the provided data.
A couple of things:
You probably want to use scatter3d as you mentioned it in your question instead of plot.
I am using NumPy to read in the data because this way you will have the data as NumPy arrays which make the masking and slicing easy.
Here I am creating 3 conditional masks depending on the magnitude of dims.
Next, you store these masks in a list and then iterate over it to use one mask at a time.
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import pandas
import numpy as np
import os
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
X, Y, Z, dims = np.loadtxt('sample.txt', unpack=True, skiprows=1)
mask1 = (dims<140)
mask2 = ((dims>=140) & (dims<180))
mask3 = ((dims>=180) & (dims<200))
masks = [mask1, mask2, mask3]
colors = ['r', 'b', 'g'] # color order as you specified in the question
for mask, color in zip(masks, colors):
ax.scatter3D(X[mask], Y[mask], Z[mask], c=color)
ax.set_xlabel('Center X (mm)')
ax.set_ylabel('Center Y (mm)')
ax.set_zlabel('Center Z (mm)')
plt.show()
I have two sets of different sizes that I'd like to plot on the same histogram. However, since one set has ~330,000 values and the other has about ~16,000 values, their frequency histograms are hard to compare. I'd like to plot a histogram comparing the two sets such that the y-axis is the % of occurrences in that bin. My code below gets close to this, except that rather than having the individual bin values sum to 1.0, the integral of the histogram sums to 1.0 (this is because of the normed=True parameter).
How can I achieve my goal? I've already tried manually calculating the % frequency and using plt.bar() but rather than overlaying the plots, the plots are compared side by side. I want to keep the effect of having the alpha=0.5
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
if plt.get_fignums():
plt.close('all')
electric = pd.read_csv('electric.tsv', sep='\t')
gas = pd.read_csv('gas.tsv', sep='\t')
electric_df = pd.DataFrame(electric)
gas_df = pd.DataFrame(ngma_nonheat)
electric = electric_df['avg_daily']*30
gas = gas_df['avg_daily']*30
## Create a plot for NGMA gas usage
plt.figure("Usage Comparison")
weights_electric = np.ones_like(electric)/float(len(electric))
weights_gas = np.ones_like(gas)/float(len(gas))
bins=np.linspace(0, 200, num=50)
n, bins, rectangles = plt.hist(electric, bins, alpha=0.5, label='electric usage', normed=True, weights=weights_electric)
plt.hist(gas, bins, alpha=0.5, label='gas usage', normed=True, weights=weights_gas)
plt.legend(loc='upper right')
plt.xlabel('Average 30 day use in therms')
plt.ylabel('% of customers')
plt.title('NGMA Customer Usage Comparison')
plt.show()
It sounds like you don't want the normed/density kwarg in this case. You're already using weights. If you multiply your weights by 100 and leave out the normed=True option, you should get exactly what you had in mind.
For example:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(1)
x = np.random.normal(5, 2, 10000)
y = np.random.normal(2, 1, 3000000)
xweights = 100 * np.ones_like(x) / x.size
yweights = 100 * np.ones_like(y) / y.size
fig, ax = plt.subplots()
ax.hist(x, weights=xweights, color='lightblue', alpha=0.5)
ax.hist(y, weights=yweights, color='salmon', alpha=0.5)
ax.set(title='Histogram Comparison', ylabel='% of Dataset in Bin')
ax.margins(0.05)
ax.set_ylim(bottom=0)
plt.show()
On the other hand, what you're currently doing (weights and normed) would result in (note the units on the y-axis):
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(1)
x = np.random.normal(5, 2, 10000)
y = np.random.normal(2, 1, 3000000)
xweights = 100 * np.ones_like(x) / x.size
yweights = 100 * np.ones_like(y) / y.size
fig, ax = plt.subplots()
ax.hist(x, weights=xweights, color='lightblue', alpha=0.5, normed=True)
ax.hist(y, weights=yweights, color='salmon', alpha=0.5, normed=True)
ax.set(title='Histogram Comparison', ylabel='% of Dataset in Bin')
ax.margins(0.05)
ax.set_ylim(bottom=0)
plt.show()
I am trying to make a histgram over a specific range but the matplotlib.pyplot.hist() function keeps cropping the range to the bins with entries in them. A toy example:
import numpy as np
import matplotlib.pyplot as plt
x = np.random.uniform(-100,100,1000)
nbins = 100
xmin = -500
xmax = 500
fig = plt.figure();
ax = fig.add_subplot(1, 1, 1)
ax.hist(x, bins=nbins,range=[xmin,xmax])
plt.show()
Gives a plot with a range [-100,100]. Why is the range not [-500,500] as specified?
(I am using the Enthought Canopy 1.4 and sorry but I do not have a high enough rep to post an image of the plot.)
Actually, it works if you specify with range an interval shorter than [-100, 100]. For example, this work :
import numpy as np
import matplotlib.pyplot as plt
x = np.random.uniform(-100, 100, 1000)
plt.hist(x, bins=30, range=(-50, 50))
plt.show()
If you want to plot the histogram on a range larger than [x.min(), x.max()] you can change xlim propertie of the plot.
import numpy as np
import matplotlib.pyplot as plt
x = np.random.uniform(-100, 100, 1000)
plt.hist(x, bins=30)
plt.xlim(-500, 500)
plt.show()
the following code is for making the same y axis limit on two subplots
f ,ax = plt.subplots(1,2,figsize = (30, 13),gridspec_kw={'width_ratios': [5, 1]})
df.plot(ax = ax[0], linewidth = 2.5)
ylim = [df['min_return'].min()*1.1,df['max_return'].max()*1.1]
ax[0].set_ylim(ylim)
ax[1].hist(data,normed =1, bins = num_bin, color = 'yellow' ,alpha = 1)
ax[1].set_ylim(ylim)
I would like to change the default x range for the histogram plot. The range of the data is from 7 to 12. However, by default the histogram starts right at 7 and ends at 13. I want it to start at 6.5 and end at 12.5. However, the ticks should go from 7 to 12.How do I do it?
import asciitable
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import pylab
from pylab import xticks
data = asciitable.read(file)
hmag = data['col8']
visits = data['col14']
origin = data['col13']
n, bins, patches = plt.hist(hmag, 30, facecolor='gray', align='mid')
xticks(range(7,13))
pylab.rc("axes", linewidth=8.0)
pylab.rc("lines", markeredgewidth=2.0)
plt.xlabel('H mag', fontsize=14)
plt.ylabel('# of targets', fontsize=14)
pylab.xticks(fontsize=15)
pylab.yticks(fontsize=15)
plt.grid(True)
plt.savefig('hmag_histogram.eps', facecolor='w', edgecolor='w', format='eps')
plt.show()
plt.hist(hmag, 30, range=[6.5, 12.5], facecolor='gray', align='mid')
import matplotlib.pyplot as plt
...
plt.xlim(xmin=6.5, xmax = 12.5)
the following code is for making the same y axis limit on two subplots
f ,ax = plt.subplots(1,2,figsize = (30, 13),gridspec_kw={'width_ratios': [5, 1]})
df.plot(ax = ax[0], linewidth = 2.5)
ylim = [lower_limit,upper_limit]
ax[0].set_ylim(ylim)
ax[1].hist(data,normed =1, bins = num_bin, color = 'yellow' ,alpha = 1)
ax[1].set_ylim(ylim)
just a reminder, plt.hist(range=[low, high]) the histogram auto crops the range if the specified range is larger than the max&min of the data points. So if you want to specify the y-axis range number, i prefer to use set_ylim