improve fit of polyval and polyfit - python

Can anyone help me to get a better fit on this curve? The plot shows test data for compression on an elastomer. The blue dots are the data, the red line is the 3rd order fit. If I increase the order of the fit the situation doesn't improve much.
I can supply the test data file if desired (There are 236 rows and 3 columns).
My code is given here:
import csv
import numpy as np
from matplotlib import pyplot as plt
import numpy.polynomial.polynomial as poly
from itertools import islice
with open('neoprene1.csv') as f:
readCSV = csv.reader(f, delimiter=',')
Cs = []
Rs = []
for row in islice(readCSV,1,None):
R = row[1] if row[1] != '' else 0.0 # Strain
C = row[2] if row[2] != '' else 0.0 # Stress
Rs.append(R)
Cs.append(C)
q = np.array([Rs],dtype = float).transpose()
s = np.array([Cs],dtype = float).transpose()
q1 = q[:,0]
s1 = s[:,0]
plt.cla()
z = poly.polyfit(q1,s1,3)
zz = poly.polyval(q,z)
plt.title('Neoprene True Stress-Strain')
plt.xlabel('Strain (%)')
plt.ylabel('Stress (MPa)')
aa = plt.plot(s1,zz,'r-', label = 'Fitting function')
bb = plt.plot(s1,q1,'bo', label = 'Raw data')

Related

Matplotlib is making positives into negatives

I'm just trying to graph some simple data and whether I try to do it with plot or subplot it comes out the same. All values in my lists are positive but the y axis is acting like a number line with only positives.
import matplotlib.pyplot as plt
xVal = []
yVal1 = []
yVal2 = []
yVal3 = []
data = []
# load data
with open(r"path", 'r') as f:
data = f.readlines()
yVal1 = data[0].split(",")
yVal2 = data[1].split(",")
yVal3 = data[2].split(",")
del yVal1[-1]
del yVal2[-1]
del yVal3[-1]
print(yVal1)
print(yVal2)
print(yVal3)
# graph dem bois
xVal = [*range(0, len(yVal1))]
'''fig, ax = plt.subplots(3)
ax[0].plot(xVal, yVal1)
ax[0].set_title("pm5")
ax[1].plot(xVal, yVal2)
ax[1].set_title("pm7.5")
ax[2].plot(xVal, yVal3)
ax[2].set_title("pm10")
fig.suptitle("Particulate Levels over time")'''
plt.plot(xVal, yVal3)
plt.show()
As per the comment by Jody Klymak I converted the string lists into float lists and it worked.
fyVal1 = [float(x) for x in yVal1]

Clustering near Lines using coordinates in Python

I have a list with x- and y-coordinates of start and Endpoints of some lines.Lines as csv
331,178,486,232
185,215,386,308
172,343,334,419
406,128,570,165
306,106,569,166
159,210,379,299
236,143,526,248
303,83,516,178
409,62,572,106
26,287,372,427
31,288,271,381
193,228,432,330
120,196,432,329
136,200,374,297
111,189,336,289
284,186,560,249
333,202,577,254
229,194,522,219
349,111,553,165
121,322,342,416
78,303,285,391
103,315,340,415
The lines look like this on my example image. Lines plotted
I want to group lines which are close to each other into clusters and create one line for each cluster. For this example i would like to have 5 clusters. After that i want to calculate the distance from each clusterline to the next.
import csv, math
file = open("lines.csv")
csvreader = csv.reader(file)
lines = []
for data in csvreader:
lines.append({'x1':int(data[0]), 'y1':int(data[1]), 'x2':int(data[2]), 'y2':int(data[3])})
def point_delta(p1, p2):
return abs(p1 - p2)
for line in lines[:2]:
for line_rev in lines:
#x_start_delta = abs(line['x1'] - line_rev['x1'])
x_start_delta = point_delta(line['x1'], line_rev['x1'])
y_start_delta = abs(line['y1'] - line_rev['y1'])
start_distance = math.sqrt(x_start_delta**2 + y_start_delta**2)
x_end_delta = abs(line['x2'] - line_rev['x2'])
y_end_delta = abs(line['y2'] - line_rev['y2'])
end_distance = math.sqrt(x_end_delta**2 + y_end_delta**2)
avg_distance = (start_distance + end_distance)/2
cluster = 0
if avg_distance < 100:
print(f"distance: {avg_distance}")
print("############## next line ##############")
I have written some code to calculate the distance between each line but cant find a way to save the lines which are near to each other in different lists.
Does somebody know how to do this or is there another way to create clusters? Im also thinking about using the middlepoint instead of the start-/endpoint
You could throw a clustering on it, but it has trouble with the lonely line at the end
data = [[331,178,486,232],
[185,215,386,308],
[172,343,334,419],
[406,128,570,165],
[306,106,569,166],
[159,210,379,299],
[236,143,526,248],
[303,83,516,178],
[409,62,572,106],
[26,287,372,427],
[31,288,271,381],
[193,228,432,330],
[120,196,432,329],
[136,200,374,297],
[111,189,336,289],
[284,186,560,249],
[333,202,577,254],
[229,194,522,219],
[349,111,553,165],
[121,322,342,416],
[78,303,285,391],
[103,315,340,415]]
import pandas as pd
import sklearn
from sklearn.cluster import MiniBatchKMeans
import numpy as np
lines = pd.DataFrame(data)
CLUSTERS = 5
X = lines.values
kmeans = MiniBatchKMeans(n_clusters=CLUSTERS,max_no_improvement=100).fit(X)
import numpy as np
import pylab as pl
from matplotlib import collections as mc
lines_segments = [ [ (l[0],l[1]),([l[2],l[3]]) ] for l in lines.values]
center_segments = [ [ (l[0],l[1]),([l[2],l[3]]) ] for l in kmeans.cluster_centers_]
line_collection = mc.LineCollection(lines_segments, linewidths=2)
centers = mc.LineCollection(center_segments, colors='red', linewidths=4, alpha=1)
fig, ax = pl.subplots()
ax.add_collection(line_collection)
ax.add_collection(centers)
ax.autoscale()
ax.margins(0.1)
You can see the centers with
kmeans.cluster_centers_

Plotting Results from For Iteration

I am new to python and I want to ask how to plot a figure from for loop iteration?
Here is the code!
import numpy as np #numerical python
import matplotlib.pyplot as plt #python plotting
from math import exp #exponential math directory
T_initial = 293
T_reference = range(298,340,2)
R1_initial = 57500
R2_initial = 13300
R3_initial = 18000
R4_initial = 5600
Beta = 4150
Vin = 2.8
for i in T_reference:
R1_refe = R1_initial*exp(Beta*((1/i)-(1/T_initial)))
Rs = (R2_initial/(R2_initial+ R1_refe)) - (R4_initial/(R3_initial+R4_initial))
Vo = Vin*Rs
Vo_round = round(Vo, 3)
print(i,Vo_round)
You can plot the data like this:
for i in T_reference:
R1_refe = R1_initial*exp(Beta*((1/i)-(1/T_initial)))
Rs = (R2_initial/(R2_initial+ R1_refe)) - (R4_initial/(R3_initial+R4_initial))
Vo = Vin*Rs
Vo_round = round(Vo, 3)
plt.scatter(i, Vo_round)
plt.show()
Is this what you were looking for?
Put the values of the items you want to plot into two different arrays using the 'append' method (one for the 'x' axis and one for the 'y' axis).
Then just plot the graph with the matplotlib
It should be something like the below:
is1 = list()
vos = list()
for i in T_reference:
R1_refe = R1_initial*exp(Beta*((1/i)-(1/T_initial)))
Rs = (R2_initial/(R2_initial+ R1_refe)) - (R4_initial/(R3_initial+R4_initial))
Vo = Vin*Rs
Vo_round = round(Vo, 3)
print(i,Vo_round)
is1.append(i)
vos.append(Vo_round)
plt.plot(is1,vos)
Here is a reference for plotting
Two options without a for-loop
Create a function
def v_o(T_reference):
T_initial = 293
R1_initial = 57500
R2_initial = 13300
R3_initial = 18000
R4_initial = 5600
Beta = 4150
Vin = 2.8
R1_refe = R1_initial*exp(Beta*((1/T_reference)-(1/T_initial)))
Rs = (R2_initial/(R2_initial + R1_refe)) - (R4_initial/(R3_initial+R4_initial))
Vo = Vin*Rs
Vo_round = round(Vo, 3)
return Vo_round
Option 1: Use a pandas dataframe
import pandas as pd
import matplotlib.pyplot as plt
# create the dataframe with T_reference
df = pd.DataFrame({'t_ref': [*range(298, 340,2)]})
# Call the function to calculate v_o
df['v_o'] = df.t_ref.apply(v_o)
# plot
df.plot('t_ref', 'v_o', legend=False)
plt.show()
Option 2: use map
T_reference = [*range(298, 340,2)]
v_o = list(map(v_o, T_reference))
plt.plot(T_reference, v_o)
plt.show()
Plot
The plot from both options looks like the following

Plot a graph by reading coulmns from CSV

I have a csv file which contains four columns. The first column in time, the second, third and fourth columns are Accelerometer readings. I want to plot Time on X-Axis and the Accelerometer reading on Y-Axis.
Sample Data:
0 1.0969 9.7721 0.614
20 1.1146 9.7501 0.7444
40 1.1146 9.7501 0.7444
60 1.0124 9.7151 0.7169
79 1.0124 9.7151 0.7169
100 1.0927 9.7324 0.7356
120 1.0927 9.7324 0.7356
Here is what I have so far.
from numpy import genfromtxt
import csv
import matplotlib.pyplot as plt
#import numpy as np
# Open the desired file for reading
f = open('walk-shoe.csv', "rb")
# create a object of csv class and read the file
# use ',' as a delimiter
reader = csv.reader(f, delimiter=',')
time_row = 0
accel_1_row = 0
accel_2_row = 0
accel_3_row = 0
time = []
accel_1 = []
accel_2 = []
accel_3 = []
# create a list of 'Time in ms'
for row in reader:
# Skip the first row
time_row = time_row + 1
if time_row == 1:
continue
time.append(row[0])
accel_1.append(row[1])
accel_2.append(row[2])
accel_3.append(row[3])
# print the contents of the list
# print time
#print accel_1
#print accel_2
#print accel_3
# append all the list accelerometer list together
final_accel = []
final_accel.append(accel_1)
final_accel.append(accel_2)
final_accel.append(accel_3)
#print final_accel
# plot the graph
for i in range(len(final_accel)):
plt.plot(time,[pt[i] for pt in final_accel],label = 'id %s'%i)
plt.legend()
plt.show()
I want to plot all the sensor readings on one graph on y axis and time in x axis
You seem to be importing numpy in the code you give, therefore I will take that to mean that library is available to you. Numpy lets you read in data very easily using numpy.loadtxt().
You can then create a for loop which goes through columns 1 to 3 and plots data against column 0 (time).
import numpy as np
import matplotlib.pyplot as plt
data = np.loadtxt('walk-shoe.csv', delimiter=',', dtype=float)
print (data)
#[[ 0. 1.0969 9.7721 0.614 ]
# [ 20. 1.1146 9.7501 0.7444]
# [ 40. 1.1146 9.7501 0.7444]
# [ 60. 1.0124 9.7151 0.7169]
# [ 79. 1.0124 9.7151 0.7169]
# [ 100. 1.0927 9.7324 0.7356]
# [ 120. 1.0927 9.7324 0.7356]]
for i in range(1,data.shape[1]):
plt.plot(data[:,0], data[:,i], label='id %s' %i)
plt.legend()
plt.show()

KDE (kernel density estimation) of Matrix with 13 dimensions using numpy and matplotlib

I keep getting these errors:
Traceback (most recent call last): File "D:/Dropbox/Public/Data Processor/src/dP.py", line 69, in <module>
gkde = stats.gaussian_kde(kdeData) File "D:\Python27\lib\site-packages\scipy\stats\kde.py", line 86, in
__init__
self._compute_covariance() File "D:\Python27\lib\site-packages\scipy\stats\kde.py", line 339, in
_compute_covariance
self.inv_cov = linalg.inv(self.covariance) File "D:\Python27\lib\site-packages\scipy\linalg\basic.py", line 327, in inv
raise LinAlgError("singular matrix") numpy.linalg.linalg.LinAlgError: singular matrix
I'm not sure how this applies to my data. It's a huge wall of text but if it helps to at least see what context the code is being applied in here it is http://pastebin.com/Myx5TpYy. Each matrix has 12 data points in it, to be honest I'm not sure if I'll need all the data points but I think getting to know what's going wrong here will help me out either way.
Here is the code I've been trying to get to work
from decimal import *
import csv
import numpy as np
from scipy import stats
import matplotlib.pylab as plt
matrix = []
col1 = []
col2 = []
col3 = []
col4 = []
col5 = []
col6 = []
col7 = []
col8 = []
col9 = []
col10 = []
col11 = []
col12 = []
for line in open("data.txt", "r"):
col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10, col_11, col_12 = line.split()
col_1_val = col_1[:]
col_2_val = col_2[:]
col_3_val = col_3[:]
col_4_val = col_4[:]
col_5_val = col_5[:]
col_6_val = col_6[:]
col_7_val = col_7[:]
col_8_val = col_8[:]
col_9_val = col_9[:]
col_10_val = col_10[:]
col_11_val = col_11[:]
col_12_val = col_12[:]
matrix.append([Decimal(col_1_val), Decimal(col_2_val), Decimal(col_3_val), Decimal(col_4_val), Decimal(col_5_val), Decimal(col_6_val), Decimal(col_7_val), Decimal(col_8_val), Decimal(col_8_val), Decimal(col_9_val), Decimal(col_10_val), Decimal(col_11_val), Decimal(col_12_val)])
kdeData = np.array(matrix).T
print kdeData
gkde = stats.gaussian_kde(kdeData)
ind = np.linspace(-13,13,512)
kdepdf = gkde.evaluate(matrix)
plt.figure()
plt.hist(xn, bins=20, normed=1)
plt.plot(ind, stats.norm.pdf(ind), color="r", label='DGP normal')
plt.plot(in, kdepdf, label='kde', color="g") plt.title('Kernel Density Estimation')
plt.legend()
plt.show()
It seems that there are two completely zero columns in the input matrix. This produces a big band of zeros in the internal covariance matrix calculated by gaussian_kde, making it singular and causing the routine to fail.
If I rewrite your example like this:
import numpy as np
from scipy import stats
import matplotlib.pylab as plt
valid=[0,1,2,3,4,5,6,7,10,11]
matrix=np.loadtxt('data.txt',skiprows=1,usecols=valid)
kdeData = np.array(matrix).T
print kdeData
gkde = stats.gaussian_kde(kdeData)
ind = np.linspace(-13,13,512)
kdepdf = gkde.evaluate(kdeData)
plt.figure()
plt.plot(ind, stats.norm.pdf(ind), color="r", label='DGP normal')
plt.plot(ind, kdepdf, label='kde', color="g")
plt.title('Kernel Density Estimation')
plt.legend()
plt.show()
It works:
First, you are doing far, far too much work to get the matrix. Replace everything from the line matrix = [] to the end of the for loop with:
matrix = []
for line in open("data.txt", "r"):
matrix.append([Decimal(e) for e in line[:-1].split()])
Secondly, the reason for the "singular matrix" error depends entirely on your data. For example, do you have a row of entirely the same value (say, all 0's or all 1's)? Alternatively, do you have two rows that are identical? Either of these would lead to this problem using the kernel density estimator.

Categories