I have a dataframe like this:
import pandas as pd
import numpy as np
time = pd.date_range('2018-05-14 00:00:00','2018-05-14 01:00:00',freq='5T')
mile = np.linspace(0,100,10)
x = list(time)*len(mile)
y = np.repeat(mile,len(time))
z = []
for i in range(0,10,1):
z.extend(np.random.normal(loc=i*5, scale=5, size=13))
origin_data = pd.DataFrame({'x':x, 'y':y ,'z':z})
origin_data contains original points' positions(x and y) and their values(z). I want to interpolate the z values at these new positions: x = pd.date_range('2018-05-14 00:00:00','2018-05-14 01:00:00',freq='1T') with y = np.linspace(0,91,1) just using bilinear interpolation.
I learned about the official document about scipy.interpolate.interp2d. But its x type is numeric, mine is datetime. Also, the tutorial's z values are calculated while mine are already given so I don't know how to handle the order of input z value. Could anyone give me an example that contains an interpolation result plot based on the dataframe I provided above? Thank you for your attention!
This is the way I found to this question:
import pandas as pd
import numpy as np
from scipy import interpolate
import itertools
time = pd.date_range('2018-05-14 00:00:00','2018-05-14 01:00:00',freq='5T')
mile = np.arange(0,100,10)
x = list(time)*len(mile)
y = np.repeat(mile,len(time))
z = []
for i in range(0,10,1):
z.extend(np.random.normal(loc=i*5, scale=5, size=13))
origin_data = pd.DataFrame({'x':x, 'y':y ,'z':z})
from ggplot import *
ggplot(aes(x = 'x', y = 'y', colour = 'z'), data = origin_data) +\
geom_point(size = 100) +\
scale_x_date(labels = date_format("%Y-%m-%d %H:%M:S"))
x_numeric = [x.timestamp() for x in origin_data['x']]
x_cors = pd.unique(x_numeric)
y_cors = pd.unique(origin_data['y'])
cors = list(itertools.product(x_cors,y_cors))
interp_func = interpolate.LinearNDInterpolator(cors, z)
interp_func = interpolate.CloughTocher2DInterpolator(cors, z)
new_x = [x.timestamp() for x in pd.date_range('2018-05-14 00:00:00','2018-05-14 01:00:00',freq='1T')]
new_y = np.arange(0,91,1)
new_cors = list(itertools.product(new_x,new_y))
new_z = interp_func(new_cors)
new_data = pd.DataFrame({'x':[x[0] for x in new_cors],
'y':[x[1] for x in new_cors],
'z':new_z})
import datetime
new_data['x'] = [pd.Timestamp(x,unit = 's') for x in new_data['x']]
ggplot(aes(x='x',y='y',colour='z'),data=new_data) +\
geom_point(size=100) +\
scale_x_date(labels = date_format("%Y-%m-%d %H:%M:S"))
Related
I would like to do the same interpolation as MATLAB in Python with scipy. Here is an example of my code.
This is what I have in MATLAB :
x = linspace(-10,10,10);
y = linspace(-5,5,10);
DATA = rand(10,10);
[XX,YY] = ndgrid(x,y);
XX2 = XX;
YY2 = YY;
DATA2 = interpn(XX,YY,DATA,XX2,YY2);
I try to to it in Python but seems to be difficult to do it with matrix in meshgrid format.
import numpy as np
import scipy.interpolate
x = np.linspace(-10,10,10)
y = np.linspace(-5,5,10)
DATA = np.random.rand(10,10)
[XX,YY] = np.meshgrid(x,y,indexing='ij')
XX2 = XX
YY2 = YY
DATA2 = scipy.interpolate.interpn(XX,YY,DATA,XX2,YY2) # NOT WORKING
Any ideas on how to solve this issue ?
I found the solution. Here the code in Python with Scipy :
import numpy as np
import scipy.interpolate
x = np.linspace(-10,10,10)
y = np.linspace(-5,5,10)
DATA = np.random.rand(10,10)
[XX,YY] = np.meshgrid(x,y,indexing='ij')
XX2 = XX
YY2 = YY
gridInitial = (x,y)
gridToInterpolate = np.stack((XX2.ravel(),YY2.ravel()),axis=1)
DATA2 = scipy.interpolate.interpn(gridInitial,DATA,gridToInterpolate,method='linear',bounds_error=False,fill_value=0)
DATA2 = DATA2.reshape(XX2.shape)
gridToInterpolate is just a vector with all the points of the new grid. So you just have to reshape your data at the end.
I did a logistic regression on my data and now I find the best Theta array to find the class of a new data.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def h_theta(x,theta):
return np.dot(x,np.transpose(theta))
def g_z(x,theta):
return 1/(1+pow(np.e,-h_theta(x,theta)))
def cost_function(x,y,theta):
cost = 0
for i in range(len(y)):
l = np.log(g_z(x[i],theta))
cost += -y[i]*l -(1-y[i])*np.log((1-(g_z(x[i],theta))))
return cost/(2*len(y))
def updata_theta(x,y,theta,alpha):
for i in range(6):
u = 0
for j in range(len(y)):
u += (h_theta(x[j],theta)-y[j])*x[j,i]
theta[0,i] -= alpha*u/(len(y))
data = pd.read_csv("D:\REZA\programming\machine learning-andrew ng\coding\machine-learning-ex2\ex2\ex2data2.csv")
y = np.array(data["1"])
s = np.array(data.drop("1",axis=1))
x1T2 = np.zeros((117,1))
x2T2 = np.zeros((117,1))
x1x2 = np.zeros((117,1))
one = np.ones((117,1))
m = len(y)
for i in range(m):
x1T2[i] = s[i,0]*s[i,0]
x2T2[i] = s[i,1]*s[i,1]
x1x2[i] = s[i,0]*s[i,1]
x = np.append(one,s,axis=1)
f = np.append(x1T2,x2T2,axis=1)
f = np.append(f,x1x2,axis=1)
x = np.append(x,f,axis=1)
x = np.array(x,dtype=np.float)
theta = np.zeros((1,6),dtype=float)
n=0
alpha = 0.003
while(n<100 and cost_function(x,y,theta)>0.01):
updata_theta(x,y,theta,alpha)
n+=1
I can plot my data with plt.scatter
plt.scatter(x[:,1],x[:,2],c=y)
plt.show()
scatter plot output
Now I want to plot decision boundary using this theta array, but I don't know how to do it.
When I plot y as a function of t, the values for a do not change. When I print the appended list I see they are all 0.0. Please help! I'm confused because y as a function of x plots fine. I can't include the actual code, but here is a minimum working example.
import numpy as np
from math import *
from astropy.table import Table
import matplotlib.pyplot as plt
from random import random
x = 0
y = 0
t = 0
h = 0.0100
tf = 40
N=ceil(tf/h)
tnew = t
x_list = [x]
y_list = [y]
t_list = [t]
for i in range(N):
#while y >= 0:
tnew = t + h*i
t = tnew
print(t)
#First and second derivatives
# stuff happens (can't share the code)
x_new = random()
y_new = random()
x = x_new
y = y_new
""" appends selected data for ability to plot"""
x_list.append(x)
y_list.append(y)
t_list.append(t)
#break
""" Plot1"""
plt.plot(t_list,y_list)
plt.show()
""" Plot2"""
plt.plot(x_list,y_list)
plt.show()
First plot I just get a vertical line
Second plot is the way it should be
First plot:
Second plot
I'm trying to do a polynomial regression of csv file I have (or any other csv file). I am not sure how to build a matrix that contains the data set I have. Here is the current code I have.
from matplotlib.pyplot import *
import numpy as np
import csv
from math import *
f=open("data_setshort.csv", "r")
csv_f = csv.reader(f)
xval = []
yval = []
polyreg = []
for row in csv_f:
xval.append(row[0])
yval.append(row[1])
f.close()
x = np.array(xval)
y = np.array(yval)
xlist = [float(i) for i in x]
ylist = [float(i) for i in y]
print xlist
print ylist
def poly_fit(x,y):
for i in range(1, len(x)):
M = np.matrix(x[i],y[i])
return M
Matrix = poly_fit(xlist,ylist)
print Matrix
The poly_fit(x,y) is the function I am trying to build to do the polynomial regression.
Maybe I misunderstood exactly what you're trying to do, but if it's fitting a polynomial from continuous x and y values, then this will do it:
import numpy as np
xi = np.random.uniform(-3, 3, 30)
ni = np.random.uniform(0, .4, 30)
coefficients = np.polyfit(xi, ni, 3)
print coefficients
Then, to use it to generate y values given new x values:
new_x = 2.5
polynomial = np.poly1d(coefficients)
new_y = polynomial(new_x)
I am trying to quickly create a simulated random walk series in pandas.
import pandas as pd
import numpy as np
dates = pd.date_range('2012-01-01', '2013-02-22')
y2 = np.random.randn(len(dates))/365
Y2 = pd.Series(y2, index=dates)
start_price = 100
would like to build another date series starting at start_price at beginning date and growing by the random growth rates.
pseudo code:
P0 = 100
P1 = 100 * exp(Y2)
P2 = P1 * exp(Y2)
very easy to do in excel, but I cant think of way of doing it without iterating over a dataframe/series with pandas and I also bump my head doing that.
have tried:
p = Y2.apply(np.exp)-1
y = p.cumsum(p)
y.plot()
this should give the cumulatively compound return since start
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
def geometric_brownian_motion(T = 1, N = 100, mu = 0.1, sigma = 0.01, S0 = 20):
dt = float(T)/N
t = np.linspace(0, T, N)
W = np.random.standard_normal(size = N)
W = np.cumsum(W)*np.sqrt(dt) ### standard brownian motion ###
X = (mu-0.5*sigma**2)*t + sigma*W
S = S0*np.exp(X) ### geometric brownian motion ###
return S
dates = pd.date_range('2012-01-01', '2013-02-22')
T = (dates.max()-dates.min()).days / 365
N = dates.size
start_price = 100
y = pd.Series(
geometric_brownian_motion(T, N, sigma=0.1, S0=start_price), index=dates)
y.plot()
plt.show()