How to get (random) number from Gaussian Mixture model(GMM)? - python

I want to get a list of random numbers from GMM.
I've plotted a distribution like this.
And I was wondering whether there is some way to get the value from the distribution curve, such as getting number 0.94, 0.96, 0.95.... How to get it?
I am not sure whether the belowing is useful for u or not. I got the code from Minimal reproduction
from matplotlib import rc
from sklearn import mixture
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
import matplotlib.ticker as tkr
import scipy.stats as stats
# x = open("prueba.dat").read().splitlines()
# create the data
x = np.concatenate((np.random.normal(5, 5, 1000),np.random.normal(10, 2, 1000)))
f = np.ravel(x).astype(np.float)
f=f.reshape(-1,1)
g = mixture.GaussianMixture(n_components=3,covariance_type='full')
g.fit(f)
weights = g.weights_
means = g.means_
covars = g.covariances_
plt.hist(f, bins=100, histtype='bar', density=True, ec='red', alpha=0.5)
# f_axis = f.copy().ravel()
# f_axis.sort()
# plt.plot(f_axis,weights[0]*stats.norm.pdf(f_axis,means[0],np.sqrt(covars[0])).ravel(), c='red')
plt.rcParams['agg.path.chunksize'] = 10000
plt.grid()
plt.show()
Thanks!

According to the documentation you can simply do g.sample(n_samples=64) on the fitted model to generate 64 samples

Related

I am not able to display graph in matplotlib

I'm trying to print a logistic differential equation and I'm pretty sure the equation is written correctly but my graph doesn't display anything.
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
def eq(con,x):
return con*x*(1-x)
xList = np.linspace(0,4, num=1000)
con = 2.6
x= .4
for num in range(len(xList)-1):
plt.plot(xList[num], eq(con,x))
x=eq(con,x)
plt.xlabel('Time')
plt.ylabel('Population')
plt.title("Logistic Differential Equation")
plt.show()
You get nothing in your plot because you are plotting points.
In plt you need to have x array and y array (that have the same length) in order to make a plot.
If you want to do exactly what you are doing I suggest to do like this:
import matplotlyb.pyplot as plt # just plt is sufficent
import numpy as np
def eq(con,x):
return con*x*(1-x)
xList = np.linspace(0,4, num=1000)
con = 2.6
x= .4
y = np.zeros(len(xList)) # initialize an array with the same lenght as xList
for num in range(len(xList)-1):
y[num] = eq(con,x)
x=eq(con,x)
plt.figure() # A good habit is always to use figures in plt
plt.plot(xList, y) # 2 arrays of the same lenght
plt.xlabel('Time')
plt.ylabel('Population')
plt.title("Logistic Differential Equation")
plt.show() # now you should get somthing here
I hope that this helps you ^^

How to draw the Probability Density Function (PDF) plot in Python?

I'd like to ask how to draw the Probability Density Function (PDF) plot in Python.
This is my codes.
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
import scipy.stats as stats
.
x = np.random.normal(50, 3, 1000)
source = {"Genotype": ["CV1"]*1000, "AGW": x}
df=pd.DataFrame(source)
df
I generated a data frame. Then, I tried to draw a PDF graph.
df["AGW"].sort_values()
df_mean = np.mean(df["AGW"])
df_std = np.std(df["AGW"])
pdf = stats.norm.pdf(df["AGW"], df_mean, df_std)
plt.plot(df["AGW"], pdf)
I obtained above graph. What I did wrong? Could you let me how to draw the Probability Density Function (PDF) Plot which is also known as normal distribution graph.
Could you let me know which codes (or library) I need to use to draw the PDF graph?
Always many thanks!!
You just need to sort the values (not really check what's after edit)
pdf = stats.norm.pdf(df["AGW"].sort_values(), df_mean, df_std)
plt.plot(df["AGW"].sort_values(), pdf)
And it will work.
The line df["AGW"].sort_values() doesn't change df. Maybe you meant df.sort_values(by=['AGW'], inplace=True).
In that case the full code will be :
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
import scipy.stats as stats
x = np.random.normal(50, 3, 1000)
source = {"Genotype": ["CV1"]*1000, "AGW": x}
df=pd.DataFrame(source)
df.sort_values(by=['AGW'], inplace=True)
df_mean = np.mean(df["AGW"])
df_std = np.std(df["AGW"])
pdf = stats.norm.pdf(df["AGW"], df_mean, df_std)
plt.plot(df["AGW"], pdf)
Which gives :
Edit :
I think here we already have the distribution (x is normally distributed) so we dont need to generate the pdf of x. As the use of the pdf is for something like this :
mu = 50
variance = 3
sigma = math.sqrt(variance)
x = np.linspace(mu - 5*sigma, mu + 5*sigma, 1000)
plt.plot(x, stats.norm.pdf(x, mu, sigma))
plt.show()
Here we dont need to generate the distribution from x points, we only need to plot the density of the distribution we already have .
So you might use this :
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
x = np.random.normal(50, 3, 1000) #Generating Data
source = {"Genotype": ["CV1"]*1000, "AGW": x}
df=pd.DataFrame(source) #Converting to pandas DataFrame
df.plot(kind = 'density'); # or df["AGW"].plot(kind = 'density');
Which gives :
You might use other packages if you want, like seaborn :
import seaborn as sns
plt.figure(figsize = (5,5))
sns.kdeplot(df["AGW"] , bw = 0.5 , fill = True)
plt.show()
Or this :
import seaborn as sns
sns.set_style("whitegrid") # Setting style(Optional)
plt.figure(figsize = (10,5)) #Specify the size of figure
sns.distplot(x = df["AGW"] , bins = 10 , kde = True , color = 'teal'
, kde_kws=dict(linewidth = 4 , color = 'black')) #kde for normal distribution
plt.show()
Check this article for more.

howto get fit parameters from seaborn distplot fit=?

I'm using seaborn distplot (data, fit=stats.gamma)
How do I get the fit parameters returned?
Here is an example:
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
df = pd.read_csv ('RequestSize.csv')
import matplotlib.pyplot as plt
reqs = df['12 web pages']
reqs = reqs.dropna()
reqs = reqs[np.logical_and (reqs > np.percentile (reqs, 0), reqs < np.percentile (reqs, 95))]
dist = sns.distplot (reqs, fit=stats.gamma)
Use the object you passed to distplot:
stats.gamma.fit(reqs)
I confirm the above is true - the sns.distplot fit method is equivalent to the fit method in scipy.stats so you can get the parameters from there, e.g.:
from scipy import stats
ax = sns.distplot(e_t_hat, bins=20, kde=False, fit=stats.norm);
plt.title('Distribution of Cointegrating Spread for Brent and Gasoil')
# Get the fitted parameters used by sns
(mu, sigma) = stats.norm.fit(e_t_hat)
print "mu={0}, sigma={1}".format(mu, sigma)
# Legend and labels
plt.legend(["normal dist. fit ($\mu=${0:.2g}, $\sigma=${1:.2f})".format(mu, sigma)])
plt.ylabel('Frequency')
# Cross-check this is indeed the case - should be overlaid over black curve
x_dummy = np.linspace(stats.norm.ppf(0.01), stats.norm.ppf(0.99), 100)
ax.plot(x_dummy, stats.norm.pdf(x_dummy, mu, sigma))
plt.legend(["normal dist. fit ($\mu=${0:.2g}, $\sigma=${1:.2f})".format(mu, sigma),
"cross-check"])

How to make the histograms 'touch' in pylab?

So I made a program that does what I need, mainly plots histogram from my data, but I have a few issues with it:
Here's the program:
# -*- coding: cp1250 -*-
from __future__ import division
from numpy import *
from matplotlib import rc
from matplotlib.pyplot import *
import numpy as np
import matplotlib.pyplot as plt
data = loadtxt("mioni.txt", int)
nuz = len(data)
nsmp = 20
duz = int(nuz/nsmp)
L = []
for i1 in range(0,nsmp):
suma = 0
for i2 in range(0,duz):
suma += data[i1*duz+i2]
L.append(suma)
print L
plt.hist(L, 20, normed=1, facecolor='blue', alpha=0.75)
plt.xlabel('t(\mu s)')
plt.ylabel('Broj događaja')
plt.axis([0,10,0,300])
plt.grid(True)
plt.show()
EDIT: so I managed to deal with the ugly sums, but now my histograms don't work :(
Data is here: http://dropcanvas.com/kqjem
What's wrong? I get tons of errors and python crashes :\
The problem comes from having a discrete data set, it looks like you set the bins parameter to something that doesn't fit. Use the pylab.hist parameter histtype="stepfilled" to get them to touch without the lines. Here are a few examples:
import numpy as np
import pylab as plt
# Sample data
X1 = np.random.exponential(1.0,size=5000)
X2 = [int(z) for z in X1]
plt.subplot(221)
plt.hist(X1,bins=50)
plt.title('Continuous Data')
plt.subplot(222)
plt.hist(X2,bins=50)
plt.title('Discrete Data')
plt.subplot(223)
plt.hist(X2,histtype='stepfilled')
plt.title('Discrete Data Filled')
plt.show()
use numpy.histogram: http://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html
or matplotlib.pyplot.hist: http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist
for example:
plt.hist(data, bins=20)

Python-Matplotlib boxplot. How to show percentiles 0,10,25,50,75,90 and 100?

I would like to plot an EPSgram (see below) using Python and Matplotlib.
The boxplot function only plots quartiles (0, 25, 50, 75, 100). So, how can I add two more boxes?
I put together a sample, if you're still curious. It uses scipy.stats.scoreatpercentile, but you may be getting those numbers from elsewhere:
from random import random
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import scoreatpercentile
x = np.array([random() for x in xrange(100)])
# percentiles of interest
perc = [min(x), scoreatpercentile(x,10), scoreatpercentile(x,25),
scoreatpercentile(x,50), scoreatpercentile(x,75),
scoreatpercentile(x,90), max(x)]
midpoint = 0 # time-series time
fig = plt.figure()
ax = fig.add_subplot(111)
# min/max
ax.broken_barh([(midpoint-.01,.02)], (perc[0], perc[1]-perc[0]))
ax.broken_barh([(midpoint-.01,.02)], (perc[5], perc[6]-perc[5]))
# 10/90
ax.broken_barh([(midpoint-.1,.2)], (perc[1], perc[2]-perc[1]))
ax.broken_barh([(midpoint-.1,.2)], (perc[4], perc[5]-perc[4]))
# 25/75
ax.broken_barh([(midpoint-.4,.8)], (perc[2], perc[3]-perc[2]))
ax.broken_barh([(midpoint-.4,.8)], (perc[3], perc[4]-perc[3]))
ax.set_ylim(-0.5,1.5)
ax.set_xlim(-10,10)
ax.set_yticks([0,0.5,1])
ax.grid(True)
plt.show()

Categories