The Birthday paradox - how to plot - python

from __future__ import division, print_function
from numpy.random import randint
import random
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
def bday(c):
trials = 5000
count = 0
for trial in range(trials):
year = [0]*365
l = False
for i in range(c):
bdayp = randint(1,365)
year[bdayp] = year[bdayp] + 1
if year[bdayp] > 1:
l = True
if l == True:
count = count + 1
prob = count / trials
return prob
for i in range(2,41):
a = bday(i)
print(i,a)
As you can see, I generate the number of people in the class along with the probability that they share a birthday. How can I plot this so that I have n (number of people) on the x-axis and probability on the y-axis using matplotlib.pyplot?
Thanks.

I've linked in the comments the proper documentation to your problem. For the sake of you finding your own solution, perhaps looking at the following might make more sense of how to go about your problem:
def func(x):
return x * 10
x = []
y = []
for i in range(10):
x.append(i)
y.append(func(i))
plt.plot(x, y)
The above can also be achieved by doing the following:
def func(x):
return x * 10
x = np.arange(10)
plt.plot(x, func(x))
Here is the documentation for np.arange; both will plot the following:

Related

Why are all the values in my appended list the same?

When I plot y as a function of t, the values for a do not change. When I print the appended list I see they are all 0.0. Please help! I'm confused because y as a function of x plots fine. I can't include the actual code, but here is a minimum working example.
import numpy as np
from math import *
from astropy.table import Table
import matplotlib.pyplot as plt
from random import random
x = 0
y = 0
t = 0
h = 0.0100
tf = 40
N=ceil(tf/h)
tnew = t
x_list = [x]
y_list = [y]
t_list = [t]
for i in range(N):
#while y >= 0:
tnew = t + h*i
t = tnew
print(t)
#First and second derivatives
# stuff happens (can't share the code)
x_new = random()
y_new = random()
x = x_new
y = y_new
""" appends selected data for ability to plot"""
x_list.append(x)
y_list.append(y)
t_list.append(t)
#break
""" Plot1"""
plt.plot(t_list,y_list)
plt.show()
""" Plot2"""
plt.plot(x_list,y_list)
plt.show()
First plot I just get a vertical line
Second plot is the way it should be
First plot:
Second plot

Distribution plot with wrong total value

To create
I have made a distribution plot with code below:
from numpy import *
import numpy as np
import matplotlib.pyplot as plt
sigma = 4.1
x = np.linspace(-6*sigma, 6*sigma, 200)
def distr(n):
def g(x):
return (1/(sigma*sqrt(2*pi)))*exp(-0.5*(x/sigma)**2)
FxSum = 0
a = list()
for i in range(n):
# divide into 200 parts and sum one by one
numb = g(-6*sigma + (12*sigma*i)/n)
FxSum += numb
a.append(FxSum)
return a
plt.plot(x, distr(len(x)))
plt.show()
This is, of course, a way of getting the result without using hist(), cdf() or any other options from Python libraries.
Why the total sum is not 1? It shouldn't depend from (for example) sigma.
Almost right, but in order to integrate you have to multiply the function value g(x) times your tiny interval dx (12*sigma/200). That's the area you sum up:
from numpy import *
import numpy as np
import matplotlib.pyplot as plt
sigma = 4.1
x = np.linspace(-6*sigma, 6*sigma, 200)
def distr(n):
def g(x):
return (1/(sigma*sqrt(2*pi)))*exp(-0.5*(x/sigma)**2)
FxSum = 0
a = list()
for i in range(n):
# divide into 200 parts and sum one by one
numb = g(-6*sigma + (12*sigma*i)/n) * (12*sigma/200)
FxSum += numb
a.append(FxSum)
return a
plt.plot(x, distr(len(x)))
plt.show()

do same calculation over and over in loop with changing variable each time

I want to run this code with several x values and get all the outputs in a list. First run x should be 1, next loop x should be 2, then 3 etc... Is there an easy way to implement this in my code?
EDIT: The loop is now working after i added:
for x in range(1, max_value):
Is there an way I can make a list of the outputs for the degrees of freedom for each loop?
https://imgur.com/eQxHzHZ
import numpy as np
import math
from scipy.stats import skew, kurtosis, kurtosistest
import matplotlib.pyplot as plt
from scipy.stats import norm,t
import pandas as pd
data = pd.read_excel(r"filename.xlsx",sheet_name,skiprows=x+5,usecols="C")
ret = np.array(data.values)
from scipy.stats import skew, kurtosis
X = np.random.randn(10000000)
print(skew(X))
print(kurtosis(X, fisher=False))
# N(x; mu, sig) best fit (finding: mu, stdev)
mu_norm, sig_norm = norm.fit(ret)
dx = 0.0001 # resolution
x = np.arange(-0.1, 0.1, dx)
pdf = norm.pdf(x, mu_norm, sig_norm)
print("Integral norm.pdf(x; mu_norm, sig_norm) dx = %.2f" % (np.sum(pdf*dx)))
print("Sample mean = %.5f" % mu_norm)
print("Sample stdev = %.5f" % sig_norm)
print()
df = pd.DataFrame(ret)
# Student t best fit (finding: nu)
x = t.fit(ret)
nu, mu_t, sig_t = x
pdf2 = t.pdf(x, nu, mu_t, sig_t)
print("Integral t.pdf(x; mu, sig) dx = %.2f" % (np.sum(pdf2*dx)))
print("nu = %.2f" % nu)
print()
You can use a for loop :
for x in range(n):
f(x)
will call the function f on x with x=0, x=1, all the way to x=n-1.
Put the whole code in a for loop that increments x each time.
for x in range(1, max_value):
#do stuff
#add value to a list
print(your_list)
Side note: maybe add all your imports at the beginning, before any scripts start
EDIT x2: as x is overwritten, do
my_list = []
for my_var in range(1, max_value):
x = my_var
#do stuff with x
#add value to a list
my_list.append(x)
print(my_list)

How to plot confidence intervals for stattools ccf function?

I am computing the cross-correlation function using ccf from statsmodels. It works fine except I can't see how to also plot the confidence intervals. I notice that acf seems to have much more functionality. Here is a toy example just to have something to see:
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.tsa.stattools as stattools
def create(n):
x = np.zeros(n)
for i in range(1, n):
if np.random.rand() < 0.9:
if np.random.rand() < 0.5:
x[i] = x[i-1] + 1
else:
x[i] = np.random.randint(0,100)
return x
x = create(4000)
y = create(4000)
plt.plot(stattools.ccf(x, y)[:100])
This gives:
Unfortunately, the confidence interval is not provided by the statsmodels cross-correlation function (ccf). In R the ccf() would also print the confidence interval.
Here, we need to calculate the confidence interval by ourself and plot it out afterwards. The confidence interval is here computed as 2 / np.sqrt(lags). For basic info on confidence intervals for cross-correlation refer to:
Stats StackExchange answer by Rob Hyndman: https://stats.stackexchange.com/a/3128/43304
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.tsa.stattools as stattools
def create(n):
x = np.zeros(n)
for i in range(1, n):
if np.random.rand() < 0.9:
if np.random.rand() < 0.5:
x[i] = x[i-1] + 1
else:
x[i] = np.random.randint(0,100)
return x
x = create(4000)
y = create(4000)
lags= 4000
sl = 2 / np.sqrt(lags)
plt.plot(x, list(np.ones(lags) * sl), color='r')
plt.plot(x, list(np.ones(lags) * -sl), color='r')
plt.plot(stattools.ccf(x, y)[:100])
This leads to the following plot with the additional red lines:

Adding samples on the same figure with out deleting the previous samples using bokeh

I want to plot large data in one graph sample by sample. The newly sampled data should be added to the previous samples on the same plot. I have tried the code below, but it doesn't work as intended. It only displays the current sample; it does not display all the previous samples. Any idea how to do it using bokeh?
import numpy as np
import time
from bokeh.plotting import *
from bokeh.session import Session
from random import randint
from bokeh.objects import GLyph,GridPlot, HoverTool
output_notebook(url="default")
#output_notebook()
figure(x_range=[0,1000000],y_range=[0,100],plot_width=1000,plot_height=600,title="Hello World!")
hold()
xs = []
ys = []
x = []
y = []
for num in range(0,1000000):
xs.append(num)
for num in range(0,1000000):
ys.append(randint(0,100))
scatter(x, y, color='#33A02C', fill_color=None, size=8)
renderer = [r for r in curplot().renderers if isinstance(r, Glyph)][0]
ds = renderer.data_source
show()
i = 0
iteration = 0
last = len(xs)
sampleSize = 1000
while(iteration <= len(xs)/sampleSize):
x = xs[i:i+sampleSize]
y = ys[i:i+sampleSize]
ds.data["x"] = x
ds.data["y"] = y
ds._dirty = True
cursession().store_objects(ds)
iteration = iteration + 1
i = i + sampleSize

Categories